pax_global_header 0000666 0000000 0000000 00000000064 14626177470 0014530 g ustar 00root root 0000000 0000000 52 comment=2b21de55a609e1fe93c53ada95ed16f2a6777d3c
Azure-WALinuxAgent-2b21de5/ 0000775 0000000 0000000 00000000000 14626177470 0015530 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/.gitattributes 0000664 0000000 0000000 00000004726 14626177470 0020434 0 ustar 00root root 0000000 0000000 ###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto
###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp
###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary
###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary
###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
Azure-WALinuxAgent-2b21de5/.github/ 0000775 0000000 0000000 00000000000 14626177470 0017070 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/.github/CONTRIBUTING.md 0000664 0000000 0000000 00000010527 14626177470 0021326 0 ustar 00root root 0000000 0000000 # Contributing to Linux Guest Agent
First, thank you for contributing to WALinuxAgent repository!
## Basics
If you would like to become an active contributor to this project, please follow the instructions provided in [Microsoft Azure Projects Contribution Guidelines](http://azure.github.io/guidelines/).
## Table of Contents
[Before starting](#before-starting)
- [Github basics](#github-basics)
- [Code of Conduct](#code-of-conduct)
[Making Changes](#making-changes)
- [Pull Requests](#pull-requests)
- [Pull Request Guidelines](#pull-request-guidelines)
- [Cleaning up commits](#cleaning-up-commits)
- [General guidelines](#general-guidelines)
- [Testing guidelines](#testing-guidelines)
## Before starting
### Github basics
#### GitHub workflow
If you don't have experience with Git and Github, some of the terminology and process can be confusing. [Here's a guide to understanding Github](https://guides.github.com/introduction/flow/).
#### Forking the Azure/Guest-Configuration-Extension repository
Unless you are working with multiple contributors on the same file, we ask that you fork the repository and submit your Pull Request from there. [Here's a guide to forks in Github](https://guides.github.com/activities/forking/).
### Code of Conduct
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
## Making Changes
### Pull Requests
You can find all of the pull requests that have been opened in the [Pull Request](https://github.com/Azure/Guest-Configuration-Extension/pulls) section of the repository.
To open your own pull request, click [here](https://github.com/Azure/WALinuxAgent/compare). When creating a pull request, keep the following in mind:
- Make sure you are pointing to the fork and branch that your changes were made in
- Choose the correct branch you want your pull request to be merged into
- The pull request template that is provided **should be filled out**; this is not something that should just be deleted or ignored when the pull request is created
- Deleting or ignoring this template will elongate the time it takes for your pull request to be reviewed
### Pull Request Guidelines
A pull request template will automatically be included as a part of your PR. Please fill out the checklist as specified. Pull requests **will not be reviewed** unless they include a properly completed checklist.
#### Cleaning up Commits
If you are thinking about making a large change, **break up the change into small, logical, testable chunks, and organize your pull requests accordingly**.
Often when a pull request is created with a large number of files changed and/or a large number of lines of code added and/or removed, GitHub will have a difficult time opening up the changes on their site. This forces the Azure Guest-Configuration-Extension team to use separate software to do a code review on the pull request.
If you find yourself creating a pull request and are unable to see all the changes on GitHub, we recommend **splitting the pull request into multiple pull requests that are able to be reviewed on GitHub**.
If splitting up the pull request is not an option, we recommend **creating individual commits for different parts of the pull request, which can be reviewed individually on GitHub**.
For more information on cleaning up the commits in a pull request, such as how to rebase, squash, and cherry-pick, click [here](https://github.com/Azure/azure-powershell/blob/dev/documentation/cleaning-up-commits.md).
#### General guidelines
The following guidelines must be followed in **EVERY** pull request that is opened.
- Title of the pull request is clear and informative
- There are a small number of commits that each have an informative message
- A description of the changes the pull request makes is included, and a reference to the issue being resolved, if the change address any
- All files have the Microsoft copyright header
#### Testing Guidelines
The following guidelines must be followed in **EVERY** pull request that is opened.
- Pull request includes test coverage for the included changes Azure-WALinuxAgent-2b21de5/.github/ISSUE_TEMPLATE/ 0000775 0000000 0000000 00000000000 14626177470 0021253 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/.github/ISSUE_TEMPLATE/bug_report.md 0000664 0000000 0000000 00000001635 14626177470 0023752 0 ustar 00root root 0000000 0000000 ---
name: Bug report
about: Create a report to help us improve
title: "[BUG] Bug Title"
---
**Describe the bug: A clear and concise description of what the bug is.**
Note: Please add some context which would help us understand the problem better
1. Section of the log where the error occurs.
2. Serial console output
3. Steps to reproduce the behavior.
**Distro and WALinuxAgent details (please complete the following information):**
- Distro and Version: [e.g. Ubuntu 16.04]
- WALinuxAgent version [e.g. 2.2.40, you can copy the output of `waagent --version`, more info [here](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output) ]
**Additional context**
Add any other context about the problem here.
**Log file attached**
If possible, please provide the full /var/log/waagent.log file to help us understand the problem better and get the context of the issue.
Azure-WALinuxAgent-2b21de5/.github/PULL_REQUEST_TEMPLATE.md 0000664 0000000 0000000 00000002215 14626177470 0022671 0 ustar 00root root 0000000 0000000
## Description
Issue #
---
### PR information
- [ ] The title of the PR is clear and informative.
- [ ] There are a small number of commits, each of which has an informative message. This means that previously merged commits do not appear in the history of the PR. For information on cleaning up the commits in your pull request, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).
- [ ] If applicable, the PR references the bug/issue that it fixes in the description.
- [ ] New Unit tests were added for the changes made
### Quality of Code and Contribution Guidelines
- [ ] I have read the [contribution guidelines](https://github.com/Azure/WALinuxAgent/blob/master/.github/CONTRIBUTING.md). Azure-WALinuxAgent-2b21de5/.github/codecov.yml 0000664 0000000 0000000 00000000046 14626177470 0021235 0 ustar 00root root 0000000 0000000 github_checks:
annotations: false
Azure-WALinuxAgent-2b21de5/.github/workflows/ 0000775 0000000 0000000 00000000000 14626177470 0021125 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/.github/workflows/ci_pr.yml 0000664 0000000 0000000 00000010243 14626177470 0022744 0 ustar 00root root 0000000 0000000 name: CI Unit tests
on:
push:
branches: [ "*" ]
pull_request:
branches: [ "*" ]
workflow_dispatch:
jobs:
test-python-2_6-and-3_4-versions:
strategy:
fail-fast: false
matrix:
include:
- python-version: 2.6
- python-version: 3.4
name: "Python ${{ matrix.python-version }} Unit Tests"
runs-on: ubuntu-20.04
container:
image: ubuntu:16.04
volumes:
- /home/waagent:/home/waagent
defaults:
run:
shell: bash -l {0}
env:
NOSEOPTS: "--verbose"
steps:
- uses: actions/checkout@v3
- name: Install Python ${{ matrix.python-version }}
run: |
apt-get update
apt-get install -y curl bzip2 sudo python3
curl https://dcrdata.blob.core.windows.net/python/python-${{ matrix.python-version }}.tar.bz2 -o python-${{ matrix.python-version }}.tar.bz2
sudo tar xjvf python-${{ matrix.python-version }}.tar.bz2 --directory /
- name: Test with nosetests
run: |
if [[ ${{ matrix.python-version }} == 2.6 ]]; then
source /home/waagent/virtualenv/python2.6.9/bin/activate
else
source /home/waagent/virtualenv/python3.4.8/bin/activate
fi
./ci/nosetests.sh
exit $?
test-python-2_7:
strategy:
fail-fast: false
name: "Python 2.7 Unit Tests"
runs-on: ubuntu-20.04
defaults:
run:
shell: bash -l {0}
env:
NOSEOPTS: "--verbose"
steps:
- uses: actions/checkout@v3
- name: Install Python 2.7
run: |
apt-get update
apt-get install -y curl bzip2 sudo
curl https://dcrdata.blob.core.windows.net/python/python-2.7.tar.bz2 -o python-2.7.tar.bz2
sudo tar xjvf python-2.7.tar.bz2 --directory /
- name: Test with nosetests
run: |
source /home/waagent/virtualenv/python2.7.16/bin/activate
./ci/nosetests.sh
exit $?
test-current-python-versions:
strategy:
fail-fast: false
matrix:
include:
- python-version: 3.5
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e,makepkg.py"
- python-version: 3.6
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e"
- python-version: 3.7
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e"
- python-version: 3.8
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e"
- python-version: 3.9
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc"
additional-nose-opts: "--with-coverage --cover-erase --cover-inclusive --cover-branches --cover-package=azurelinuxagent"
name: "Python ${{ matrix.python-version }} Unit Tests"
runs-on: ubuntu-20.04
env:
PYLINTOPTS: ${{ matrix.PYLINTOPTS }}
PYLINTFILES: "azurelinuxagent setup.py makepkg.py tests tests_e2e"
NOSEOPTS: "--with-timer ${{ matrix.additional-nose-opts }}"
PYTHON_VERSION: ${{ matrix.python-version }}
steps:
- name: Checkout WALinuxAgent repo
uses: actions/checkout@v3
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
id: install-dependencies
run: |
sudo env "PATH=$PATH" python -m pip install --upgrade pip
sudo env "PATH=$PATH" pip install -r requirements.txt
sudo env "PATH=$PATH" pip install -r test-requirements.txt
- name: Run pylint
run: |
pylint $PYLINTOPTS --jobs=0 $PYLINTFILES
- name: Test with nosetests
if: success() || (failure() && steps.install-dependencies.outcome == 'success')
run: |
./ci/nosetests.sh
exit $?
- name: Compile Coverage
if: matrix.python-version == 3.9
run: |
echo looking for coverage files :
ls -alh | grep -i coverage
sudo env "PATH=$PATH" coverage combine coverage.*.data
sudo env "PATH=$PATH" coverage xml
sudo env "PATH=$PATH" coverage report
- name: Upload Coverage
if: matrix.python-version == 3.9
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
Azure-WALinuxAgent-2b21de5/.gitignore 0000664 0000000 0000000 00000002046 14626177470 0017522 0 ustar 00root root 0000000 0000000 # Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# Virtualenv
py3env/
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyCharm
.idea/
.idea_modules/
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
waagentc
*.pyproj
*.sln
*.suo
waagentc
bin/waagent2.0c
# rope project
.ropeproject/
# mac osx specific files
.DS_Store
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
pyvenv.cfg
.venv
pip-selfcheck.json
# virtualenv
venv/
ENV/
# dotenv
.env
# pyenv
.python-version
.vscode/ Azure-WALinuxAgent-2b21de5/CODEOWNERS 0000664 0000000 0000000 00000001266 14626177470 0017130 0 ustar 00root root 0000000 0000000 1
# See https://help.github.com/articles/about-codeowners/
# for more info about CODEOWNERS file
# It uses the same pattern rule for gitignore file
# https://git-scm.com/docs/gitignore#_pattern_format
# Provisioning Agent
# The Azure Linux Provisioning team is interested in getting notifications
# when there are requests for changes in the provisioning agent. For any
# questions, please feel free to reach out to thstring@microsoft.com.
/azurelinuxagent/pa/ @trstringer @anhvoms
/tests/pa/ @trstringer @anhvoms
#
# RDMA
#
/azurelinuxagent/common/rdma.py @longlimsft
/azurelinuxagent/pa/rdma/ @longlimsft
#
# Linux Agent team
#
* @narrieta @ZhidongPeng @nagworld9 @maddieford @gabstamsft
Azure-WALinuxAgent-2b21de5/LICENSE.txt 0000664 0000000 0000000 00000026130 14626177470 0017355 0 ustar 00root root 0000000 0000000
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016 Microsoft Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Azure-WALinuxAgent-2b21de5/MAINTENANCE.md 0000664 0000000 0000000 00000001313 14626177470 0017572 0 ustar 00root root 0000000 0000000 ## Microsoft Azure Linux Agent Maintenance Guide
### Version rules
* Production releases are public
* Test releases are for internal use
* Production versions use only [major].[minor].[revision]
* Test versions use [major].[minor].[revision].[build]
* Test a.b.c.0 is equivalent to Prod a.b.c
* Publishing to Production requires incrementing the revision and dropping the build number
* We do not use pre-release labels on any builds
### Version updates
* The version of the agent can be found at https://github.com/Azure/WALinuxAgent/blob/master/azurelinuxagent/common/version.py#L53 assigned to AGENT_VERSION
* Update the version here and send for PR before declaring a release via GitHub
Azure-WALinuxAgent-2b21de5/MANIFEST 0000664 0000000 0000000 00000000570 14626177470 0016663 0 ustar 00root root 0000000 0000000 # file GENERATED by distutils, do NOT edit
README
setup.py
bin/waagent
config/waagent.conf
config/waagent.logrotate
test/test_logger.py
walinuxagent/__init__.py
walinuxagent/agent.py
walinuxagent/conf.py
walinuxagent/envmonitor.py
walinuxagent/extension.py
walinuxagent/install.py
walinuxagent/logger.py
walinuxagent/protocol.py
walinuxagent/provision.py
walinuxagent/util.py
Azure-WALinuxAgent-2b21de5/MANIFEST.in 0000664 0000000 0000000 00000000114 14626177470 0017262 0 ustar 00root root 0000000 0000000 recursive-include bin *
recursive-include init *
recursive-include config *
Azure-WALinuxAgent-2b21de5/NOTICE 0000664 0000000 0000000 00000000241 14626177470 0016431 0 ustar 00root root 0000000 0000000 Microsoft Azure Linux Agent
Copyright 2012 Microsoft Corporation
This product includes software developed at
Microsoft Corporation (http://www.microsoft.com/).
Azure-WALinuxAgent-2b21de5/README.md 0000664 0000000 0000000 00000060135 14626177470 0017014 0 ustar 00root root 0000000 0000000
# Microsoft Azure Linux Agent
## Linux distributions support
Our daily automation tests most of the [Linux distributions supported by Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/endorsed-distros); the Agent can be
used on other distributions as well, but development, testing and support for those are done by the open source community.
Testing is done using the develop branch, which can be unstable. For a stable build please use the master branch instead.
[](https://codecov.io/gh/Azure/WALinuxAgent/branch/develop)
## Introduction
The Microsoft Azure Linux Agent (waagent) manages Linux provisioning and VM interaction with the Azure Fabric Controller. It provides the following
functionality for Linux IaaS deployments:
* Image Provisioning
* Creation of a user account
* Configuring SSH authentication types
* Deployment of SSH public keys and key pairs
* Setting the host name
* Publishing the host name to the platform DNS
* Reporting SSH host key fingerprint to the platform
* Resource Disk Management
* Formatting and mounting the resource disk
* Configuring swap space
* Networking
* Manages routes to improve compatibility with platform DHCP servers
* Ensures the stability of the network interface name
* Kernel
* Configure virtual NUMA (disable for kernel <2.6.37)
* Configure SCSI timeouts for the root device (which could be remote)
* Diagnostics
* Console redirection to the serial port
* SCVMM Deployments
* Detect and bootstrap the VMM agent for Linux when running in a System
Center Virtual Machine Manager 2012R2 environment
* VM Extension
* Inject component authored by Microsoft and Partners into Linux VM (IaaS)
to enable software and configuration automation
* VM Extension reference implementation on [GitHub](https://github.com/Azure/azure-linux-extensions)
## Communication
The information flow from the platform to the agent occurs via two channels:
* A boot-time attached DVD for IaaS deployments.
This DVD includes an OVF-compliant configuration file that includes all
provisioning information other than the actual SSH keypairs.
* A TCP endpoint exposing a REST API used to obtain deployment and topology
configuration.
### HTTP Proxy
The Agent will use an HTTP proxy if provided via the `http_proxy` (for `http` requests) or
`https_proxy` (for `https` requests) environment variables. Due to limitations of Python,
the agent *does not* support HTTP proxies requiring authentication.
Similarly, the Agent will bypass the proxy if the environment variable `no_proxy` is set.
Note that the way to define those environment variables for the Agent service varies across different distros. For distros
that use systemd, a common approach is to use Environment or EnvironmentFile in the [Service] section of the service
definition, for example using an override or a drop-in file (see "systemctl edit" for overrides).
Example
```bash
# cat /etc/systemd/system/walinuxagent.service.d/http-proxy.conf
[Service]
Environment="http_proxy=http://proxy.example.com:80/"
Environment="https_proxy=http://proxy.example.com:80/"
#
```
The Agent passes its environment to the VM Extensions it executes, including `http_proxy` and `https_proxy`, so defining
a proxy for the Agent will also define it for the VM Extensions.
The [`HttpProxy.Host` and `HttpProxy.Port`](#httpproxyhost-httpproxyport) configuration variables, if used, override
the environment settings. Note that this configuration variables are local to the Agent process and are not passed to
VM Extensions.
## Requirements
The following systems have been tested and are known to work with the Azure
Linux Agent. Please note that this list may differ from the official list
of supported systems on the Microsoft Azure Platform as described [here](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/endorsed-distros).
Waagent depends on some system packages in order to function properly:
* Python 2.6+
* OpenSSL 1.0+
* OpenSSH 5.3+
* Filesystem utilities: sfdisk, fdisk, mkfs, parted
* Password tools: chpasswd, sudo
* Text processing tools: sed, grep
* Network tools: ip-route
## Installation
Installing via your distribution's package repository is the only method that is supported.
You can install from source for more advanced options, such as installing to a custom location or creating
custom images. Installing from source, though, may override customizations done to the Agent by your
distribution, and is meant only for advanced users. We provide very limited support for this method.
To install from source, you can use **setuptools**:
```bash
sudo python setup.py install --register-service
```
For Python 3, use:
```bash
sudo python3 setup.py install --register-service
```
You can view more installation options by running:
```bash
sudo python setup.py install --help
```
The agent's log file is kept at `/var/log/waagent.log`.
Lastly, you can also customize your own RPM or DEB packages using the configuration
samples provided in the deb and rpm sections below. This method is also meant for advanced users and we
provide very limited support for it.
## Upgrade
Upgrading via your distribution's package repository or using automatic updates are the only supported
methods. More information can be found here: [Update Linux Agent](https://learn.microsoft.com/en-us/azure/virtual-machines/extensions/update-linux-agent)
To upgrade the Agent from source, you can use **setuptools**. Upgrading from source is meant for advanced
users and we provide very limited support for it.
```bash
sudo python setup.py install --force
```
Restart waagent service,for most of linux distributions:
```bash
sudo service waagent restart
```
For Ubuntu, use:
```bash
sudo service walinuxagent restart
```
For CoreOS, use:
```bash
sudo systemctl restart waagent
```
## Command line options
### Flags
`-verbose`: Increase verbosity of specified command
`-force`: Skip interactive confirmation for some commands
### Commands
`-help`: Lists the supported commands and flags.
`-deprovision`: Attempt to clean the system and make it suitable for re-provisioning, by deleting the following:
* All SSH host keys (if Provisioning.RegenerateSshHostKeyPair is 'y' in the configuration file)
* Nameserver configuration in /etc/resolv.conf
* Root password from /etc/shadow (if Provisioning.DeleteRootPassword is 'y' in the configuration file)
* Cached DHCP client leases
* Resets host name to localhost.localdomain
**WARNING!** Deprovision does not guarantee that the image is cleared of all sensitive information and suitable for redistribution.
`-deprovision+user`: Performs everything under deprovision (above) and also deletes the last provisioned user account and associated data.
`-version`: Displays the version of waagent
`-serialconsole`: Configures GRUB to mark ttyS0 (the first serial port) as the boot console. This ensures that kernel bootup logs are sent to the serial port and made available for debugging.
`-daemon`: Run waagent as a daemon to manage interaction with the platform. This argument is specified to waagent in the waagent init script.
`-start`: Run waagent as a background process
`-collect-logs [-full]`: Runs the log collector utility that collects relevant agent logs for debugging and stores them in the agent folder on disk. Exact location will be shown when run. Use flag `-full` for more exhaustive log collection.
## Configuration
A configuration file (/etc/waagent.conf) controls the actions of waagent. Blank lines and lines whose first character is a `#` are ignored (end-of-line comments are *not* supported).
A sample configuration file is shown below:
```yml
Extensions.Enabled=y
Extensions.GoalStatePeriod=6
Provisioning.Agent=auto
Provisioning.DeleteRootPassword=n
Provisioning.RegenerateSshHostKeyPair=y
Provisioning.SshHostKeyPairType=rsa
Provisioning.MonitorHostName=y
Provisioning.DecodeCustomData=n
Provisioning.ExecuteCustomData=n
Provisioning.PasswordCryptId=6
Provisioning.PasswordCryptSaltLength=10
ResourceDisk.Format=y
ResourceDisk.Filesystem=ext4
ResourceDisk.MountPoint=/mnt/resource
ResourceDisk.MountOptions=None
ResourceDisk.EnableSwap=n
ResourceDisk.EnableSwapEncryption=n
ResourceDisk.SwapSizeMB=0
Logs.Verbose=n
Logs.Collect=y
Logs.CollectPeriod=3600
OS.AllowHTTP=n
OS.RootDeviceScsiTimeout=300
OS.EnableFIPS=n
OS.OpensslPath=None
OS.SshClientAliveInterval=180
OS.SshDir=/etc/ssh
HttpProxy.Host=None
HttpProxy.Port=None
```
The various configuration options are described in detail below. Configuration
options are of three types : Boolean, String or Integer. The Boolean
configuration options can be specified as "y" or "n". The special keyword "None"
may be used for some string type configuration entries as detailed below.
### Configuration File Options
#### __Extensions.Enabled__
_Type: Boolean_
_Default: y_
This allows the user to enable or disable the extension handling functionality in the
agent. Valid values are "y" or "n". If extension handling is disabled, the goal state
will still be processed and VM status is still reported, but only every 5 minutes.
Extension config within the goal state will be ignored. Note that functionality such
as password reset, ssh key updates and backups depend on extensions. Only disable this
if you do not need extensions at all.
_Note_: disabling extensions in this manner is not the same as running completely
without the agent. In order to do that, the `provisionVMAgent` flag must be set at
provisioning time, via whichever API is being used. We will provide more details on
this on our wiki when it is generally available.
#### __Extensions.WaitForCloudInit__
_Type: Boolean_
_Default: n_
Waits for cloud-init to complete (cloud-init status --wait) before executing VM extensions.
Both cloud-init and VM extensions are common ways to customize a VM during initial deployment. By
default, the agent will start executing extensions while cloud-init may still be in the 'config'
stage and won't wait for the 'final' stage to complete. Cloud-init and extensions may execute operations
that conflict with each other (for example, both of them may try to install packages). Setting this option
to 'y' ensures that VM extensions are executed only after cloud-init has completed all its stages.
Note that using this option requires creating a custom image with the value of this option set to 'y', in
order to ensure that the wait is performed during the initial deployment of the VM.
#### __Extensions.WaitForCloudInitTimeout__
_Type: Integer_
_Default: 3600_
Timeout in seconds for the Agent to wait on cloud-init. If the timeout elapses, the Agent will continue
executing VM extensions. See Extensions.WaitForCloudInit for more details.
#### __Extensions.GoalStatePeriod__
_Type: Integer_
_Default: 6_
How often to poll for new goal states (in seconds) and report the status of the VM
and extensions. Goal states describe the desired state of the extensions on the VM.
_Note_: setting up this parameter to more than a few minutes can make the state of
the VM be reported as unresponsive/unavailable on the Azure portal. Also, this
setting affects how fast the agent starts executing extensions.
#### __AutoUpdate.UpdateToLatestVersion__
_Type: Boolean_
_Default: y_
Enables auto-update of the Extension Handler. The Extension Handler is responsible
for managing extensions and reporting VM status. The core functionality of the agent
is contained in the Extension Handler, and we encourage users to enable this option
in order to maintain an up to date version.
When this option is enabled, the Agent will install new versions when they become
available. When disabled, the Agent will not install any new versions, but it will use
the most recent version already installed on the VM.
_Notes_:
1. This option was added on version 2.10.0.8 of the Agent. For previous versions, see AutoUpdate.Enabled.
2. If both options are specified in waagent.conf, AutoUpdate.UpdateToLatestVersion overrides the value set for AutoUpdate.Enabled.
3. Changing config option requires a service restart to pick up the updated setting.
For more information on the agent version, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output).
For more information on the agent update, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#how-auto-update-works-for-extension-handler).
For more information on the AutoUpdate.UpdateToLatestVersion vs AutoUpdate.Enabled, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion).
#### __AutoUpdate.Enabled__
_Type: Boolean_
_Default: y_
Enables auto-update of the Extension Handler. This flag is supported for legacy reasons and we strongly recommend using AutoUpdate.UpdateToLatestVersion instead.
The difference between these 2 flags is that, when set to 'n', AutoUpdate.Enabled will use the version of the Extension Handler that is pre-installed on the image, while AutoUpdate.UpdateToLatestVersion will use the most recent version that has already been installed on the VM (via auto-update).
On most distros the default value is 'y'.
#### __Provisioning.Agent__
_Type: String_
_Default: auto_
Choose which provisioning agent to use (or allow waagent to figure it out by
specifying "auto"). Possible options are "auto" (default), "waagent", "cloud-init",
or "disabled".
#### __Provisioning.Enabled__ (*removed in 2.2.45*)
_Type: Boolean_
_Default: y_
This allows the user to enable or disable the provisioning functionality in the
agent. Valid values are "y" or "n". If provisioning is disabled, SSH host and
user keys in the image are preserved and any configuration specified in the
Azure provisioning API is ignored.
_Note_: This configuration option has been removed and has no effect. waagent
now auto-detects cloud-init as a provisioning agent (with an option to override
with `Provisioning.Agent`).
#### __Provisioning.MonitorHostName__
_Type: Boolean_
_Default: n_
Monitor host name changes and publish changes via DHCP requests.
#### __Provisioning.MonitorHostNamePeriod__
_Type: Integer_
_Default: 30_
How often to monitor host name changes (in seconds). This setting is ignored if
MonitorHostName is not set.
#### __Provisioning.UseCloudInit__
_Type: Boolean_
_Default: n_
This options enables / disables support for provisioning by means of cloud-init.
When true ("y"), the agent will wait for cloud-init to complete before installing
extensions and processing the latest goal state. _Provisioning.Enabled_ must be
disabled ("n") for this option to have an effect. Setting _Provisioning.Enabled_ to
true ("y") overrides this option and runs the built-in agent provisioning code.
_Note_: This configuration option has been removed and has no effect. waagent
now auto-detects cloud-init as a provisioning agent (with an option to override
with `Provisioning.Agent`).
#### __Provisioning.DeleteRootPassword__
_Type: Boolean_
_Default: n_
If set, the root password in the /etc/shadow file is erased during the
provisioning process.
#### __Provisioning.RegenerateSshHostKeyPair__
_Type: Boolean_
_Default: y_
If set, all SSH host key pairs (ecdsa, dsa and rsa) are deleted during the
provisioning process from /etc/ssh/. And a single fresh key pair is generated.
The encryption type for the fresh key pair is configurable by the
Provisioning.SshHostKeyPairType entry. Please note that some distributions will
re-create SSH key pairs for any missing encryption types when the SSH daemon is
restarted (for example, upon a reboot).
#### __Provisioning.SshHostKeyPairType__
_Type: String_
_Default: rsa_
This can be set to an encryption algorithm type that is supported by the SSH
daemon on the VM. The typically supported values are "rsa", "dsa" and "ecdsa".
Note that "putty.exe" on Windows does not support "ecdsa". So, if you intend to
use putty.exe on Windows to connect to a Linux deployment, please use "rsa" or
"dsa".
#### __Provisioning.MonitorHostName__
_Type: Boolean_
_Default: y_
If set, waagent will monitor the Linux VM for hostname changes (as returned by
the "hostname" command) and automatically update the networking configuration in
the image to reflect the change. In order to push the name change to the DNS
servers, networking will be restarted in the VM. This will result in brief loss
of Internet connectivity.
#### __Provisioning.DecodeCustomData__
_Type: Boolean_
_Default: n_
If set, waagent will decode CustomData from Base64.
#### __Provisioning.ExecuteCustomData__
_Type: Boolean_
_Default: n_
If set, waagent will execute CustomData after provisioning.
#### __Provisioning.PasswordCryptId__
_Type: String_
_Default: 6_
Algorithm used by crypt when generating password hash.
* 1 - MD5
* 2a - Blowfish
* 5 - SHA-256
* 6 - SHA-512
#### __Provisioning.PasswordCryptSaltLength__
_Type: String_
_Default: 10_
Length of random salt used when generating password hash.
#### __ResourceDisk.Format__
_Type: Boolean_
_Default: y_
If set, the resource disk provided by the platform will be formatted and mounted by waagent if the filesystem type requested by the user in "ResourceDisk.Filesystem" is anything other than "ntfs". A single partition of
type Linux (83) will be made available on the disk. Note that this partition will not be formatted if it can be successfully mounted.
#### __ResourceDisk.Filesystem__
_Type: String_
_Default: ext4_
This specifies the filesystem type for the resource disk. Supported values vary
by Linux distribution. If the string is X, then mkfs.X should be present on the
Linux image. SLES 11 images should typically use 'ext3'. BSD images should use
'ufs2' here.
#### __ResourceDisk.MountPoint__
_Type: String_
_Default: /mnt/resource_
This specifies the path at which the resource disk is mounted.
#### __ResourceDisk.MountOptions__
_Type: String_
_Default: None_
Specifies disk mount options to be passed to the mount -o command. This is a comma
separated list of values, ex. 'nodev,nosuid'. See mount(8) for details.
#### __ResourceDisk.EnableSwap__
_Type: Boolean_
_Default: n_
If set, a swap file (/swapfile) is created on the resource disk and added to the
system swap space.
#### __ResourceDisk.EnableSwapEncryption__
_Type: Boolean_
_Default: n_
If set, the swap file (/swapfile) is mounted as an encrypted filesystem (flag supported only on FreeBSD.)
#### __ResourceDisk.SwapSizeMB__
_Type: Integer_
_Default: 0_
The size of the swap file in megabytes.
#### __Logs.Verbose__
_Type: Boolean_
_Default: n_
If set, log verbosity is boosted. Waagent logs to /var/log/waagent.log and
leverages the system logrotate functionality to rotate logs.
#### __Logs.Collect__
_Type: Boolean_
_Default: y_
If set, agent logs will be periodically collected and uploaded to a secure location for improved supportability.
NOTE: This feature relies on the agent's resource usage features (cgroups); this flag will not take effect on any distro not supported.
#### __Logs.CollectPeriod__
_Type: Integer_
_Default: 3600_
This configures how frequently to collect and upload logs. Default is each hour.
NOTE: This only takes effect if the Logs.Collect option is enabled.
#### __OS.AllowHTTP__
_Type: Boolean_
_Default: n_
If SSL support is not compiled into Python, the agent will fail all HTTPS requests.
You can set this option to 'y' to make the agent fall-back to HTTP, instead of failing the requests.
NOTE: Allowing HTTP may unintentionally expose secure data.
#### __OS.EnableRDMA__
_Type: Boolean_
_Default: n_
If set, the agent will attempt to install and then load an RDMA kernel driver
that matches the version of the firmware on the underlying hardware.
#### __OS.EnableFIPS__
_Type: Boolean_
_Default: n_
If set, the agent will emit into the environment "OPENSSL_FIPS=1" when executing
OpenSSL commands. This signals OpenSSL to use any installed FIPS-compliant libraries.
Note that the agent itself has no FIPS-specific code. _If no FIPS-compliant certificates are
installed, then enabling this option will cause all OpenSSL commands to fail._
#### __OS.MonitorDhcpClientRestartPeriod__
_Type: Integer_
_Default: 30_
The agent monitor restarts of the DHCP client and restores network rules when it happens. This
setting determines how often (in seconds) to monitor for restarts.
#### __OS.RootDeviceScsiTimeout__
_Type: Integer_
_Default: 300_
This configures the SCSI timeout in seconds on the root device. If not set, the
system defaults are used.
#### __OS.RootDeviceScsiTimeoutPeriod__
_Type: Integer_
_Default: 30_
How often to set the SCSI timeout on the root device (in seconds). This setting is
ignored if RootDeviceScsiTimeout is not set.
#### __OS.OpensslPath__
_Type: String_
_Default: None_
This can be used to specify an alternate path for the openssl binary to use for
cryptographic operations.
#### __OS.RemovePersistentNetRulesPeriod__
_Type: Integer_
_Default: 30_
How often to remove the udev rules for persistent network interface names (75-persistent-net-generator.rules
and /etc/udev/rules.d/70-persistent-net.rules) (in seconds)
#### __OS.SshClientAliveInterval__
_Type: Integer_
_Default: 180_
This values sets the number of seconds the agent uses for the SSH ClientAliveInterval configuration option.
#### __OS.SshDir__
_Type: String_
_Default: `/etc/ssh`_
This option can be used to override the normal location of the SSH configuration
directory.
#### __HttpProxy.Host, HttpProxy.Port__
_Type: String_
_Default: None_
If set, the agent will use this proxy server for HTTP/HTTPS requests. These values
*will* override the `http_proxy` or `https_proxy` environment variables. Lastly,
`HttpProxy.Host` is required (if to be used) and `HttpProxy.Port` is optional.
#### __CGroups.EnforceLimits__
_Type: Boolean_
_Default: y_
If set, the agent will attempt to set cgroups limits for cpu and memory for the agent process itself
as well as extension processes. See the wiki for further details on this.
#### __CGroups.Excluded__
_Type: String_
_Default: customscript,runcommand_
The list of extensions which will be excluded from cgroups limits. This should be comma separated.
### Telemetry
WALinuxAgent collects usage data and sends it to Microsoft to help improve our products and services. The data collected is used to track service health and
assist with Azure support requests. Data collected does not include any personally identifiable information. Read our [privacy statement](http://go.microsoft.com/fwlink/?LinkId=521839)
to learn more.
WALinuxAgent does not support disabling telemetry at this time. WALinuxAgent must be removed to disable telemetry collection. If you need this feature,
please open an issue in GitHub and explain your requirement.
### Appendix
We do not maintain packaging information in this repo but some samples are shown below as a reference. See the downstream distribution repositories for officially maintained packaging.
#### deb packages
The official Ubuntu WALinuxAgent package can be found [here](https://launchpad.net/ubuntu/+source/walinuxagent).
Run once:
1. Install required packages
```bash
sudo apt-get -y install ubuntu-dev-tools pbuilder python-all debhelper
```
2. Create the pbuilder environment
```bash
sudo pbuilder create --debootstrapopts --variant=buildd
```
3. Obtain `waagent.dsc` from a downstream package repo
To compile the package, from the top-most directory:
1. Build the source package
```bash
dpkg-buildpackage -S
```
2. Build the package
```bash
sudo pbuilder build waagent.dsc
```
3. Fetch the built package, usually from `/var/cache/pbuilder/result`
#### rpm packages
The instructions below describe how to build an rpm package.
1. Install setuptools
```bash
curl https://bootstrap.pypa.io/ez_setup.py -o - | python
```
2. The following command will build the binary and source RPMs:
```bash
python setup.py bdist_rpm
```
-----
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
Azure-WALinuxAgent-2b21de5/SECURITY.md 0000664 0000000 0000000 00000005305 14626177470 0017324 0 ustar 00root root 0000000 0000000
## Security
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.**
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
## Preferred Languages
We prefer all communications to be in English.
## Policy
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
Azure-WALinuxAgent-2b21de5/__main__.py 0000664 0000000 0000000 00000001252 14626177470 0017622 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import azurelinuxagent.agent as agent
agent.main()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ 0000775 0000000 0000000 00000000000 14626177470 0020755 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/__init__.py 0000664 0000000 0000000 00000001165 14626177470 0023071 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
Azure-WALinuxAgent-2b21de5/azurelinuxagent/agent.py 0000664 0000000 0000000 00000042525 14626177470 0022435 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
"""
Module agent
"""
from __future__ import print_function
import os
import re
import subprocess
import sys
import threading
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.event as event
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.logcollector import LogCollector, OUTPUT_RESULTS_FILE_PATH
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import fileutil, textutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.utils.networkutil import AddFirewallRules
from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_VERSION, AGENT_VERSION, \
DISTRO_NAME, DISTRO_VERSION, \
PY_VERSION_MAJOR, PY_VERSION_MINOR, \
PY_VERSION_MICRO, GOAL_STATE_AGENT_VERSION, \
get_daemon_version, set_daemon_version
from azurelinuxagent.ga.collect_logs import CollectLogsHandler, get_log_collector_monitor_handler
from azurelinuxagent.pa.provision.default import ProvisionHandler
class AgentCommands(object):
"""
This is the list of all commands that the Linux Guest Agent supports
"""
DeprovisionUser = "deprovision+user"
Deprovision = "deprovision"
Daemon = "daemon"
Start = "start"
RegisterService = "register-service"
RunExthandlers = "run-exthandlers"
Version = "version"
ShowConfig = "show-configuration"
Help = "help"
CollectLogs = "collect-logs"
SetupFirewall = "setup-firewall"
Provision = "provision"
class Agent(object):
def __init__(self, verbose, conf_file_path=None):
"""
Initialize agent running environment.
"""
self.conf_file_path = conf_file_path
self.osutil = get_osutil()
# Init stdout log
level = logger.LogLevel.VERBOSE if verbose else logger.LogLevel.INFO
logger.add_logger_appender(logger.AppenderType.STDOUT, level)
# Init config
conf_file_path = self.conf_file_path \
if self.conf_file_path is not None \
else self.osutil.get_agent_conf_file_path()
conf.load_conf_from_file(conf_file_path)
# Init log
verbose = verbose or conf.get_logs_verbose()
level = logger.LogLevel.VERBOSE if verbose else logger.LogLevel.INFO
logger.add_logger_appender(logger.AppenderType.FILE, level, path=conf.get_agent_log_file())
# echo the log to /dev/console if the machine will be provisioned
if conf.get_logs_console() and not ProvisionHandler.is_provisioned():
self.__add_console_appender(level)
if event.send_logs_to_telemetry():
logger.add_logger_appender(logger.AppenderType.TELEMETRY,
logger.LogLevel.WARNING,
path=event.add_log_event)
ext_log_dir = conf.get_ext_log_dir()
try:
if os.path.isfile(ext_log_dir):
raise Exception("{0} is a file".format(ext_log_dir))
if not os.path.isdir(ext_log_dir):
fileutil.mkdir(ext_log_dir, mode=0o755, owner=self.osutil.get_root_username())
except Exception as e:
logger.error(
"Exception occurred while creating extension "
"log directory {0}: {1}".format(ext_log_dir, e))
# Init event reporter
# Note that the reporter is not fully initialized here yet. Some telemetry fields are filled with data
# originating from the goal state or IMDS, which requires a WireProtocol instance. Once a protocol
# has been established, those fields must be explicitly initialized using
# initialize_event_logger_vminfo_common_parameters(). Any events created before that initialization
# will contain dummy values on those fields.
event.init_event_status(conf.get_lib_dir())
event_dir = os.path.join(conf.get_lib_dir(), event.EVENTS_DIRECTORY)
event.init_event_logger(event_dir)
event.enable_unhandled_err_dump("WALA")
def __add_console_appender(self, level):
logger.add_logger_appender(logger.AppenderType.CONSOLE, level, path="/dev/console")
def daemon(self):
"""
Run agent daemon
"""
set_daemon_version(AGENT_VERSION)
logger.set_prefix("Daemon")
threading.current_thread().setName("Daemon")
child_args = None \
if self.conf_file_path is None \
else "-configuration-path:{0}".format(self.conf_file_path)
from azurelinuxagent.daemon import get_daemon_handler
daemon_handler = get_daemon_handler()
daemon_handler.run(child_args=child_args)
def provision(self):
"""
Run provision command
"""
from azurelinuxagent.pa.provision import get_provision_handler
provision_handler = get_provision_handler()
provision_handler.run()
def deprovision(self, force=False, deluser=False):
"""
Run deprovision command
"""
from azurelinuxagent.pa.deprovision import get_deprovision_handler
deprovision_handler = get_deprovision_handler()
deprovision_handler.run(force=force, deluser=deluser)
def register_service(self):
"""
Register agent as a service
"""
print("Register {0} service".format(AGENT_NAME))
self.osutil.register_agent_service()
print("Stop {0} service".format(AGENT_NAME))
self.osutil.stop_agent_service()
print("Start {0} service".format(AGENT_NAME))
self.osutil.start_agent_service()
def run_exthandlers(self, debug=False):
"""
Run the update and extension handler
"""
logger.set_prefix("ExtHandler")
threading.current_thread().setName("ExtHandler")
#
# Agents < 2.2.53 used to echo the log to the console. Since the extension handler could have been started by
# one of those daemons, output a message indicating that output to the console will stop, otherwise users
# may think that the agent died if they noticed that output to the console stops abruptly.
#
# Feel free to remove this code if telemetry shows there are no more agents <= 2.2.53 in the field.
#
if conf.get_logs_console() and get_daemon_version() < FlexibleVersion("2.2.53"):
self.__add_console_appender(logger.LogLevel.INFO)
try:
logger.info(u"The agent will now check for updates and then will process extensions. Output to /dev/console will be suspended during those operations.")
finally:
logger.disable_console_output()
from azurelinuxagent.ga.update import get_update_handler
update_handler = get_update_handler()
update_handler.run(debug)
def show_configuration(self):
configuration = conf.get_configuration()
for k in sorted(configuration.keys()):
print("{0} = {1}".format(k, configuration[k]))
def collect_logs(self, is_full_mode):
logger.set_prefix("LogCollector")
if is_full_mode:
logger.info("Running log collector mode full")
else:
logger.info("Running log collector mode normal")
# Check the cgroups unit
log_collector_monitor = None
cgroups_api = SystemdCgroupsApi()
cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self")
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)
if not cpu_slice_matches or not memory_slice_matches:
logger.info("The Log Collector process is not in the proper cgroups:")
if not cpu_slice_matches:
logger.info("\tunexpected cpu slice")
if not memory_slice_matches:
logger.info("\tunexpected memory slice")
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
logger.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
logger.info(msg)
return [cpu_cgroup, memory_cgroup]
try:
log_collector = LogCollector(is_full_mode)
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path)
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups)
log_collector_monitor.run()
archive = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} "
"and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH))
except Exception as e:
logger.error("Log collection completed unsuccessfully. Error: {0}".format(ustr(e)))
logger.info("Detailed log output can be found at {0}".format(OUTPUT_RESULTS_FILE_PATH))
sys.exit(1)
finally:
if log_collector_monitor is not None:
log_collector_monitor.stop()
@staticmethod
def setup_firewall(firewall_metadata):
print("Setting up firewall for the WALinux Agent with args: {0}".format(firewall_metadata))
try:
AddFirewallRules.add_iptables_rules(firewall_metadata['wait'], firewall_metadata['dst_ip'],
firewall_metadata['uid'])
print("Successfully set the firewall rules")
except Exception as error:
print("Unable to add firewall rules. Error: {0}".format(ustr(error)))
sys.exit(1)
def main(args=None):
"""
Parse command line arguments, exit with usage() on error.
Invoke different methods according to different command
"""
if args is None:
args = []
if len(args) <= 0:
args = sys.argv[1:]
command, force, verbose, debug, conf_file_path, log_collector_full_mode, firewall_metadata = parse_args(args)
if command == AgentCommands.Version:
version()
elif command == AgentCommands.Help:
print(usage())
elif command == AgentCommands.Start:
start(conf_file_path=conf_file_path)
else:
try:
agent = Agent(verbose, conf_file_path=conf_file_path)
if command == AgentCommands.DeprovisionUser:
agent.deprovision(force, deluser=True)
elif command == AgentCommands.Deprovision:
agent.deprovision(force, deluser=False)
elif command == AgentCommands.Provision:
agent.provision()
elif command == AgentCommands.RegisterService:
agent.register_service()
elif command == AgentCommands.Daemon:
agent.daemon()
elif command == AgentCommands.RunExthandlers:
agent.run_exthandlers(debug)
elif command == AgentCommands.ShowConfig:
agent.show_configuration()
elif command == AgentCommands.CollectLogs:
agent.collect_logs(log_collector_full_mode)
elif command == AgentCommands.SetupFirewall:
agent.setup_firewall(firewall_metadata)
except Exception as e:
logger.error(u"Failed to run '{0}': {1}",
command,
textutil.format_exception(e))
def parse_args(sys_args):
"""
Parse command line arguments
"""
cmd = AgentCommands.Help
force = False
verbose = False
debug = False
conf_file_path = None
log_collector_full_mode = False
firewall_metadata = {
"dst_ip": None,
"uid": None,
"wait": ""
}
regex_cmd_format = "^([-/]*){0}"
for arg in sys_args:
if arg == "":
# Don't parse an empty parameter
continue
m = re.match("^(?:[-/]*)configuration-path:([\w/\.\-_]+)", arg) # pylint: disable=W1401
if not m is None:
conf_file_path = m.group(1)
if not os.path.exists(conf_file_path):
print("Error: Configuration file {0} does not exist".format(
conf_file_path), file=sys.stderr)
print(usage())
sys.exit(1)
elif re.match("^([-/]*)deprovision\\+user", arg):
cmd = AgentCommands.DeprovisionUser
elif re.match(regex_cmd_format.format(AgentCommands.Deprovision), arg):
cmd = AgentCommands.Deprovision
elif re.match(regex_cmd_format.format(AgentCommands.Daemon), arg):
cmd = AgentCommands.Daemon
elif re.match(regex_cmd_format.format(AgentCommands.Start), arg):
cmd = AgentCommands.Start
elif re.match(regex_cmd_format.format(AgentCommands.RegisterService), arg):
cmd = AgentCommands.RegisterService
elif re.match(regex_cmd_format.format(AgentCommands.RunExthandlers), arg):
cmd = AgentCommands.RunExthandlers
elif re.match(regex_cmd_format.format(AgentCommands.Version), arg):
cmd = AgentCommands.Version
elif re.match(regex_cmd_format.format("verbose"), arg):
verbose = True
elif re.match(regex_cmd_format.format("debug"), arg):
debug = True
elif re.match(regex_cmd_format.format("force"), arg):
force = True
elif re.match(regex_cmd_format.format(AgentCommands.ShowConfig), arg):
cmd = AgentCommands.ShowConfig
elif re.match("^([-/]*)(help|usage|\\?)", arg):
cmd = AgentCommands.Help
elif re.match(regex_cmd_format.format(AgentCommands.CollectLogs), arg):
cmd = AgentCommands.CollectLogs
elif re.match(regex_cmd_format.format("full"), arg):
log_collector_full_mode = True
elif re.match(regex_cmd_format.format(AgentCommands.SetupFirewall), arg):
cmd = AgentCommands.SetupFirewall
elif re.match(regex_cmd_format.format("dst_ip=(?P[\\d.]{7,})"), arg):
firewall_metadata['dst_ip'] = re.match(regex_cmd_format.format("dst_ip=(?P[\\d.]{7,})"), arg).group(
'dst_ip')
elif re.match(regex_cmd_format.format("uid=(?P[\\d]+)"), arg):
firewall_metadata['uid'] = re.match(regex_cmd_format.format("uid=(?P[\\d]+)"), arg).group('uid')
elif re.match(regex_cmd_format.format("(w|wait)$"), arg):
firewall_metadata['wait'] = "-w"
else:
cmd = AgentCommands.Help
break
return cmd, force, verbose, debug, conf_file_path, log_collector_full_mode, firewall_metadata
def version():
"""
Show agent version
"""
print(("{0} running on {1} {2}".format(AGENT_LONG_VERSION,
DISTRO_NAME,
DISTRO_VERSION)))
print("Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR,
PY_VERSION_MINOR,
PY_VERSION_MICRO))
print("Goal state agent: {0}".format(GOAL_STATE_AGENT_VERSION))
def usage():
"""
Return agent usage message
"""
s = "\n"
s += ("usage: {0} [-verbose] [-force] [-help] "
"-configuration-path:"
"-deprovision[+user]|-register-service|-version|-daemon|-start|"
"-run-exthandlers|-show-configuration|-collect-logs [-full]|-setup-firewall [-dst_ip= -uid= [-w/--wait]]"
"").format(sys.argv[0])
s += "\n"
return s
def start(conf_file_path=None):
"""
Start agent daemon in a background process and set stdout/stderr to
/dev/null
"""
args = [sys.argv[0], '-daemon']
if conf_file_path is not None:
args.append('-configuration-path:{0}'.format(conf_file_path))
with open(os.devnull, 'w') as devnull:
subprocess.Popen(args, stdout=devnull, stderr=devnull)
if __name__ == '__main__' :
main()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/ 0000775 0000000 0000000 00000000000 14626177470 0022245 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/AgentGlobals.py 0000664 0000000 0000000 00000002322 14626177470 0025160 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
class AgentGlobals(object):
"""
This class is used for setting AgentGlobals which can be used all throughout the Agent.
"""
GUID_ZERO = "00000000-0000-0000-0000-000000000000"
#
# Some modules (e.g. telemetry) require an up-to-date container ID. We update this variable each time we
# fetch the goal state.
#
_container_id = GUID_ZERO
@staticmethod
def get_container_id():
return AgentGlobals._container_id
@staticmethod
def update_container_id(container_id):
AgentGlobals._container_id = container_id
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/__init__.py 0000664 0000000 0000000 00000001166 14626177470 0024362 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/agent_supported_feature.py 0000664 0000000 0000000 00000012643 14626177470 0027543 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.common import conf
class SupportedFeatureNames(object):
"""
Enum for defining the Feature Names for all features that we the agent supports
"""
MultiConfig = "MultipleExtensionsPerHandler"
ExtensionTelemetryPipeline = "ExtensionTelemetryPipeline"
FastTrack = "FastTrack"
GAVersioningGovernance = "VersioningGovernance" # Guest Agent Versioning
class AgentSupportedFeature(object):
"""
Interface for defining all features that the Linux Guest Agent supports and reports their if supported back to CRP
"""
def __init__(self, name, version="1.0", supported=False):
self.__name = name
self.__version = version
self.__supported = supported
@property
def name(self):
return self.__name
@property
def version(self):
return self.__version
@property
def is_supported(self):
return self.__supported
class _MultiConfigFeature(AgentSupportedFeature):
__NAME = SupportedFeatureNames.MultiConfig
__VERSION = "1.0"
__SUPPORTED = True
def __init__(self):
super(_MultiConfigFeature, self).__init__(name=_MultiConfigFeature.__NAME,
version=_MultiConfigFeature.__VERSION,
supported=_MultiConfigFeature.__SUPPORTED)
class _ETPFeature(AgentSupportedFeature):
__NAME = SupportedFeatureNames.ExtensionTelemetryPipeline
__VERSION = "1.0"
__SUPPORTED = True
def __init__(self):
super(_ETPFeature, self).__init__(name=self.__NAME,
version=self.__VERSION,
supported=self.__SUPPORTED)
class _GAVersioningGovernanceFeature(AgentSupportedFeature):
"""
CRP would drive the RSM update if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version.
Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning.
Note: Especially Windows need this flag to report to CRP that GA doesn't support the updates. So linux adopted same flag to have a common solution.
"""
__NAME = SupportedFeatureNames.GAVersioningGovernance
__VERSION = "1.0"
__SUPPORTED = conf.get_auto_update_to_latest_version()
def __init__(self):
super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME,
version=self.__VERSION,
supported=self.__SUPPORTED)
# This is the list of features that Agent supports and we advertise to CRP
__CRP_ADVERTISED_FEATURES = {
SupportedFeatureNames.MultiConfig: _MultiConfigFeature(),
SupportedFeatureNames.GAVersioningGovernance: _GAVersioningGovernanceFeature()
}
# This is the list of features that Agent supports and we advertise to Extensions
__EXTENSION_ADVERTISED_FEATURES = {
SupportedFeatureNames.ExtensionTelemetryPipeline: _ETPFeature()
}
def get_supported_feature_by_name(feature_name):
if feature_name in __CRP_ADVERTISED_FEATURES:
return __CRP_ADVERTISED_FEATURES[feature_name]
if feature_name in __EXTENSION_ADVERTISED_FEATURES:
return __EXTENSION_ADVERTISED_FEATURES[feature_name]
raise NotImplementedError("Feature with Name: {0} not found".format(feature_name))
def get_agent_supported_features_list_for_crp():
"""
List of features that the GuestAgent currently supports (like FastTrack, MultiConfig, etc).
We need to send this list as part of Status reporting to inform CRP of all the features the agent supports.
:return: Dict containing all CRP supported features with the key as their names and the AgentFeature object as
the value if they are supported by the Agent
Eg: {
MultipleExtensionsPerHandler: _MultiConfigFeature()
}
"""
return dict((name, feature) for name, feature in __CRP_ADVERTISED_FEATURES.items() if feature.is_supported)
def get_agent_supported_features_list_for_extensions():
"""
List of features that the GuestAgent currently supports (like Extension Telemetry Pipeline, etc) needed by Extensions.
We need to send this list as environment variables when calling extension commands to inform Extensions of all the
features the agent supports.
:return: Dict containing all Extension supported features with the key as their names and the AgentFeature object as
the value if the feature is supported by the Agent.
Eg: {
CRPSupportedFeatureNames.ExtensionTelemetryPipeline: _ETPFeature()
}
"""
return dict((name, feature) for name, feature in __EXTENSION_ADVERTISED_FEATURES.items() if feature.is_supported)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/conf.py 0000664 0000000 0000000 00000052533 14626177470 0023554 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
"""
Module conf loads and parses configuration file
""" # pylint: disable=W0105
import os
import os.path
from azurelinuxagent.common.utils.fileutil import read_file #pylint: disable=R0401
from azurelinuxagent.common.exception import AgentConfigError
DISABLE_AGENT_FILE = 'disable_agent'
class ConfigurationProvider(object):
"""
Parse and store key:values in /etc/waagent.conf.
"""
def __init__(self):
self.values = dict()
def load(self, content):
if not content:
raise AgentConfigError("Can't not parse empty configuration")
for line in content.split('\n'):
if not line.startswith("#") and "=" in line:
parts = line.split('=', 1)
if len(parts) < 2:
continue
key = parts[0].strip()
value = parts[1].split('#')[0].strip("\" ").strip()
self.values[key] = value if value != "None" else None
@staticmethod
def _get_default(default):
if hasattr(default, '__call__'):
return default()
return default
def get(self, key, default_value):
"""
Retrieves a string parameter by key and returns its value. If not found returns the default value,
or if the default value is a callable returns the result of invoking the callable.
"""
val = self.values.get(key)
return val if val is not None else self._get_default(default_value)
def get_switch(self, key, default_value):
"""
Retrieves a switch parameter by key and returns its value as a boolean. If not found returns the default value,
or if the default value is a callable returns the result of invoking the callable.
"""
val = self.values.get(key)
if val is not None and val.lower() == 'y':
return True
elif val is not None and val.lower() == 'n':
return False
return self._get_default(default_value)
def get_int(self, key, default_value):
"""
Retrieves an int parameter by key and returns its value. If not found returns the default value,
or if the default value is a callable returns the result of invoking the callable.
"""
try:
return int(self.values.get(key))
except TypeError:
return self._get_default(default_value)
except ValueError:
return self._get_default(default_value)
def is_present(self, key):
"""
Returns True if the given flag present in the configuration file, False otherwise.
"""
return self.values.get(key) is not None
__conf__ = ConfigurationProvider()
def load_conf_from_file(conf_file_path, conf=__conf__):
"""
Load conf file from: conf_file_path
"""
if os.path.isfile(conf_file_path) == False:
raise AgentConfigError(("Missing configuration in {0}"
"").format(conf_file_path))
try:
content = read_file(conf_file_path)
conf.load(content)
except IOError as err:
raise AgentConfigError(("Failed to load conf file:{0}, {1}"
"").format(conf_file_path, err))
__SWITCH_OPTIONS__ = {
"OS.AllowHTTP": False,
"OS.EnableFirewall": False,
"OS.EnableFIPS": False,
"OS.EnableRDMA": False,
"OS.UpdateRdmaDriver": False,
"OS.CheckRdmaDriver": False,
"Logs.Verbose": False,
"Logs.Console": True,
"Logs.Collect": True,
"Extensions.Enabled": True,
"Extensions.WaitForCloudInit": False,
"Provisioning.AllowResetSysUser": False,
"Provisioning.RegenerateSshHostKeyPair": False,
"Provisioning.DeleteRootPassword": False,
"Provisioning.DecodeCustomData": False,
"Provisioning.ExecuteCustomData": False,
"Provisioning.MonitorHostName": False,
"DetectScvmmEnv": False,
"ResourceDisk.Format": False,
"ResourceDisk.EnableSwap": False,
"ResourceDisk.EnableSwapEncryption": False,
"AutoUpdate.Enabled": True,
"AutoUpdate.UpdateToLatestVersion": True,
"EnableOverProvisioning": True,
#
# "Debug" options are experimental and may be removed in later
# versions of the Agent.
#
"Debug.CgroupLogMetrics": False,
"Debug.CgroupDisableOnProcessCheckFailure": True,
"Debug.CgroupDisableOnQuotaCheckFailure": True,
"Debug.EnableAgentMemoryUsageCheck": False,
"Debug.EnableFastTrack": True,
"Debug.EnableGAVersioning": True
}
__STRING_OPTIONS__ = {
"Lib.Dir": "/var/lib/waagent",
"DVD.MountPoint": "/mnt/cdrom/secure",
"Pid.File": "/var/run/waagent.pid",
"Extension.LogDir": "/var/log/azure",
"OS.OpensslPath": "/usr/bin/openssl",
"OS.SshDir": "/etc/ssh",
"OS.HomeDir": "/home",
"OS.PasswordPath": "/etc/shadow",
"OS.SudoersDir": "/etc/sudoers.d",
"OS.RootDeviceScsiTimeout": None,
"Provisioning.Agent": "auto",
"Provisioning.SshHostKeyPairType": "rsa",
"Provisioning.PasswordCryptId": "6",
"HttpProxy.Host": None,
"ResourceDisk.MountPoint": "/mnt/resource",
"ResourceDisk.MountOptions": None,
"ResourceDisk.Filesystem": "ext3",
"AutoUpdate.GAFamily": "Prod",
"Debug.CgroupMonitorExpiryTime": "2022-03-31",
"Debug.CgroupMonitorExtensionName": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent",
}
__INTEGER_OPTIONS__ = {
"Extensions.GoalStatePeriod": 6,
"Extensions.InitialGoalStatePeriod": 6,
"Extensions.WaitForCloudInitTimeout": 3600,
"OS.EnableFirewallPeriod": 300,
"OS.RemovePersistentNetRulesPeriod": 30,
"OS.RootDeviceScsiTimeoutPeriod": 30,
"OS.MonitorDhcpClientRestartPeriod": 30,
"OS.SshClientAliveInterval": 180,
"Provisioning.MonitorHostNamePeriod": 30,
"Provisioning.PasswordCryptSaltLength": 10,
"HttpProxy.Port": None,
"ResourceDisk.SwapSizeMB": 0,
"Autoupdate.Frequency": 3600,
"Logs.CollectPeriod": 3600,
#
# "Debug" options are experimental and may be removed in later
# versions of the Agent.
#
"Debug.CgroupCheckPeriod": 300,
"Debug.AgentCpuQuota": 50,
"Debug.AgentCpuThrottledTimeThreshold": 120,
"Debug.AgentMemoryQuota": 30 * 1024 ** 2,
"Debug.EtpCollectionPeriod": 300,
"Debug.AutoUpdateHotfixFrequency": 14400,
"Debug.AutoUpdateNormalFrequency": 86400,
"Debug.FirewallRulesLogPeriod": 86400
}
def get_configuration(conf=__conf__):
options = {}
for option in __SWITCH_OPTIONS__:
options[option] = conf.get_switch(option, __SWITCH_OPTIONS__[option])
for option in __STRING_OPTIONS__:
options[option] = conf.get(option, __STRING_OPTIONS__[option])
for option in __INTEGER_OPTIONS__:
options[option] = conf.get_int(option, __INTEGER_OPTIONS__[option])
return options
def get_default_value(option):
if option in __STRING_OPTIONS__:
return __STRING_OPTIONS__[option]
raise ValueError("{0} is not a valid configuration parameter.".format(option))
def get_int_default_value(option):
if option in __INTEGER_OPTIONS__:
return int(__INTEGER_OPTIONS__[option])
raise ValueError("{0} is not a valid configuration parameter.".format(option))
def get_switch_default_value(option):
if option in __SWITCH_OPTIONS__:
return __SWITCH_OPTIONS__[option]
raise ValueError("{0} is not a valid configuration parameter.".format(option))
def is_present(key, conf=__conf__):
"""
Returns True if the given flag present in the configuration file, False otherwise.
"""
return conf.is_present(key)
def enable_firewall(conf=__conf__):
return conf.get_switch("OS.EnableFirewall", False)
def get_enable_firewall_period(conf=__conf__):
return conf.get_int("OS.EnableFirewallPeriod", 300)
def get_remove_persistent_net_rules_period(conf=__conf__):
return conf.get_int("OS.RemovePersistentNetRulesPeriod", 30)
def get_monitor_dhcp_client_restart_period(conf=__conf__):
return conf.get_int("OS.MonitorDhcpClientRestartPeriod", 30)
def enable_rdma(conf=__conf__):
return conf.get_switch("OS.EnableRDMA", False) or \
conf.get_switch("OS.UpdateRdmaDriver", False) or \
conf.get_switch("OS.CheckRdmaDriver", False)
def enable_rdma_update(conf=__conf__):
return conf.get_switch("OS.UpdateRdmaDriver", False)
def enable_check_rdma_driver(conf=__conf__):
return conf.get_switch("OS.CheckRdmaDriver", True)
def get_logs_verbose(conf=__conf__):
return conf.get_switch("Logs.Verbose", False)
def get_logs_console(conf=__conf__):
return conf.get_switch("Logs.Console", True)
def get_collect_logs(conf=__conf__):
return conf.get_switch("Logs.Collect", True)
def get_collect_logs_period(conf=__conf__):
return conf.get_int("Logs.CollectPeriod", 3600)
def get_lib_dir(conf=__conf__):
return conf.get("Lib.Dir", "/var/lib/waagent")
def get_published_hostname(conf=__conf__):
# Some applications rely on this file; do not remove this setting
return os.path.join(get_lib_dir(conf), 'published_hostname')
def get_dvd_mount_point(conf=__conf__):
return conf.get("DVD.MountPoint", "/mnt/cdrom/secure")
def get_agent_pid_file_path(conf=__conf__):
return conf.get("Pid.File", "/var/run/waagent.pid")
def get_ext_log_dir(conf=__conf__):
return conf.get("Extension.LogDir", "/var/log/azure")
def get_agent_log_file():
return "/var/log/waagent.log"
def get_fips_enabled(conf=__conf__):
return conf.get_switch("OS.EnableFIPS", False)
def get_openssl_cmd(conf=__conf__):
return conf.get("OS.OpensslPath", "/usr/bin/openssl")
def get_ssh_client_alive_interval(conf=__conf__):
return conf.get("OS.SshClientAliveInterval", 180)
def get_ssh_dir(conf=__conf__):
return conf.get("OS.SshDir", "/etc/ssh")
def get_home_dir(conf=__conf__):
return conf.get("OS.HomeDir", "/home")
def get_passwd_file_path(conf=__conf__):
return conf.get("OS.PasswordPath", "/etc/shadow")
def get_sudoers_dir(conf=__conf__):
return conf.get("OS.SudoersDir", "/etc/sudoers.d")
def get_sshd_conf_file_path(conf=__conf__):
return os.path.join(get_ssh_dir(conf), "sshd_config")
def get_ssh_key_glob(conf=__conf__):
return os.path.join(get_ssh_dir(conf), 'ssh_host_*key*')
def get_ssh_key_private_path(conf=__conf__):
return os.path.join(get_ssh_dir(conf),
'ssh_host_{0}_key'.format(get_ssh_host_keypair_type(conf)))
def get_ssh_key_public_path(conf=__conf__):
return os.path.join(get_ssh_dir(conf),
'ssh_host_{0}_key.pub'.format(get_ssh_host_keypair_type(conf)))
def get_root_device_scsi_timeout(conf=__conf__):
return conf.get("OS.RootDeviceScsiTimeout", None)
def get_root_device_scsi_timeout_period(conf=__conf__):
return conf.get_int("OS.RootDeviceScsiTimeoutPeriod", 30)
def get_ssh_host_keypair_type(conf=__conf__):
keypair_type = conf.get("Provisioning.SshHostKeyPairType", "rsa")
if keypair_type == "auto":
'''
auto generates all supported key types and returns the
rsa thumbprint as the default.
'''
return "rsa"
return keypair_type
def get_ssh_host_keypair_mode(conf=__conf__):
return conf.get("Provisioning.SshHostKeyPairType", "rsa")
def get_extensions_enabled(conf=__conf__):
return conf.get_switch("Extensions.Enabled", True)
def get_wait_for_cloud_init(conf=__conf__):
return conf.get_switch("Extensions.WaitForCloudInit", False)
def get_wait_for_cloud_init_timeout(conf=__conf__):
return conf.get_switch("Extensions.WaitForCloudInitTimeout", 3600)
def get_goal_state_period(conf=__conf__):
return conf.get_int("Extensions.GoalStatePeriod", 6)
def get_initial_goal_state_period(conf=__conf__):
return conf.get_int("Extensions.InitialGoalStatePeriod", default_value=lambda: get_goal_state_period(conf=conf))
def get_allow_reset_sys_user(conf=__conf__):
return conf.get_switch("Provisioning.AllowResetSysUser", False)
def get_regenerate_ssh_host_key(conf=__conf__):
return conf.get_switch("Provisioning.RegenerateSshHostKeyPair", False)
def get_delete_root_password(conf=__conf__):
return conf.get_switch("Provisioning.DeleteRootPassword", False)
def get_decode_customdata(conf=__conf__):
return conf.get_switch("Provisioning.DecodeCustomData", False)
def get_execute_customdata(conf=__conf__):
return conf.get_switch("Provisioning.ExecuteCustomData", False)
def get_password_cryptid(conf=__conf__):
return conf.get("Provisioning.PasswordCryptId", "6")
def get_provisioning_agent(conf=__conf__):
return conf.get("Provisioning.Agent", "auto")
def get_provision_enabled(conf=__conf__):
"""
Provisioning (as far as waagent is concerned) is enabled if either the
agent is set to 'auto' or 'waagent'. This wraps logic that was introduced
for flexible provisioning agent configuration and detection. The replaces
the older bool setting to turn provisioning on or off.
"""
return get_provisioning_agent(conf) in ("auto", "waagent")
def get_password_crypt_salt_len(conf=__conf__):
return conf.get_int("Provisioning.PasswordCryptSaltLength", 10)
def get_monitor_hostname(conf=__conf__):
return conf.get_switch("Provisioning.MonitorHostName", False)
def get_monitor_hostname_period(conf=__conf__):
return conf.get_int("Provisioning.MonitorHostNamePeriod", 30)
def get_httpproxy_host(conf=__conf__):
return conf.get("HttpProxy.Host", None)
def get_httpproxy_port(conf=__conf__):
return conf.get_int("HttpProxy.Port", None)
def get_detect_scvmm_env(conf=__conf__):
return conf.get_switch("DetectScvmmEnv", False)
def get_resourcedisk_format(conf=__conf__):
return conf.get_switch("ResourceDisk.Format", False)
def get_resourcedisk_enable_swap(conf=__conf__):
return conf.get_switch("ResourceDisk.EnableSwap", False)
def get_resourcedisk_enable_swap_encryption(conf=__conf__):
return conf.get_switch("ResourceDisk.EnableSwapEncryption", False)
def get_resourcedisk_mountpoint(conf=__conf__):
return conf.get("ResourceDisk.MountPoint", "/mnt/resource")
def get_resourcedisk_mountoptions(conf=__conf__):
return conf.get("ResourceDisk.MountOptions", None)
def get_resourcedisk_filesystem(conf=__conf__):
return conf.get("ResourceDisk.Filesystem", "ext3")
def get_resourcedisk_swap_size_mb(conf=__conf__):
return conf.get_int("ResourceDisk.SwapSizeMB", 0)
def get_autoupdate_gafamily(conf=__conf__):
return conf.get("AutoUpdate.GAFamily", "Prod")
def get_autoupdate_enabled(conf=__conf__):
return conf.get_switch("AutoUpdate.Enabled", True)
def get_autoupdate_frequency(conf=__conf__):
return conf.get_int("Autoupdate.Frequency", 3600)
def get_enable_overprovisioning(conf=__conf__):
return conf.get_switch("EnableOverProvisioning", True)
def get_allow_http(conf=__conf__):
return conf.get_switch("OS.AllowHTTP", False)
def get_disable_agent_file_path(conf=__conf__):
return os.path.join(get_lib_dir(conf), DISABLE_AGENT_FILE)
def get_cgroups_enabled(conf=__conf__):
return conf.get_switch("CGroups.Enabled", True)
def get_monitor_network_configuration_changes(conf=__conf__):
return conf.get_switch("Monitor.NetworkConfigurationChanges", False)
def get_auto_update_to_latest_version(conf=__conf__):
"""
If set to True, agent will update to the latest version
NOTE:
when both turned on, both AutoUpdate.Enabled and AutoUpdate.UpdateToLatestVersion same meaning: update to latest version
when turned off, AutoUpdate.Enabled: reverts to pre-installed agent, AutoUpdate.UpdateToLatestVersion: uses latest version already installed on the vm and does not download new agents
Even we are deprecating AutoUpdate.Enabled, we still need to support if users explicitly setting it instead new flag.
If AutoUpdate.UpdateToLatestVersion is present, it overrides any value set for AutoUpdate.Enabled (if present).
If AutoUpdate.UpdateToLatestVersion is not present but AutoUpdate.Enabled is present and set to 'n', we adhere to AutoUpdate.Enabled flag's behavior
if both not present, we default to True.
"""
default = get_autoupdate_enabled(conf=conf)
return conf.get_switch("AutoUpdate.UpdateToLatestVersion", default)
def get_cgroup_check_period(conf=__conf__):
"""
How often to perform checks on cgroups (are the processes in the cgroups as expected,
has the agent exceeded its quota, etc)
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.CgroupCheckPeriod", 300)
def get_cgroup_log_metrics(conf=__conf__):
"""
If True, resource usage metrics are written to the local log
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.CgroupLogMetrics", False)
def get_cgroup_disable_on_process_check_failure(conf=__conf__):
"""
If True, cgroups will be disabled if the process check fails
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.CgroupDisableOnProcessCheckFailure", True)
def get_cgroup_disable_on_quota_check_failure(conf=__conf__):
"""
If True, cgroups will be disabled if the CPU quota check fails
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.CgroupDisableOnQuotaCheckFailure", True)
def get_agent_cpu_quota(conf=__conf__):
"""
CPU quota for the agent as a percentage of 1 CPU (100% == 1 CPU)
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.AgentCpuQuota", 50)
def get_agent_cpu_throttled_time_threshold(conf=__conf__):
"""
Throttled time threshold for agent cpu in seconds.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.AgentCpuThrottledTimeThreshold", 120)
def get_agent_memory_quota(conf=__conf__):
"""
Memory quota for the agent in bytes.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.AgentMemoryQuota", 30 * 1024 ** 2)
def get_enable_agent_memory_usage_check(conf=__conf__):
"""
If True, Agent checks it's Memory usage.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableAgentMemoryUsageCheck", False)
def get_cgroup_monitor_expiry_time(conf=__conf__):
"""
cgroups monitoring for pilot extensions disabled after expiry time
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get("Debug.CgroupMonitorExpiryTime", "2022-03-31")
def get_cgroup_monitor_extension_name (conf=__conf__):
"""
cgroups monitoring extension name
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get("Debug.CgroupMonitorExtensionName", "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent")
def get_enable_fast_track(conf=__conf__):
"""
If True, the agent use FastTrack when retrieving goal states
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableFastTrack", True)
def get_etp_collection_period(conf=__conf__):
"""
Determines the frequency to perform ETP collection on extensions telemetry events.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.EtpCollectionPeriod", 300)
def get_self_update_hotfix_frequency(conf=__conf__):
"""
Determines the frequency to check for Hotfix upgrades ( version changed in new upgrades).
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.SelfUpdateHotfixFrequency", 4 * 60 * 60)
def get_self_update_regular_frequency(conf=__conf__):
"""
Determines the frequency to check for regular upgrades (.. version changed in new upgrades).
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.SelfUpdateRegularFrequency", 24 * 60 * 60)
def get_enable_ga_versioning(conf=__conf__):
"""
If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableGAVersioning", False)
def get_firewall_rules_log_period(conf=__conf__):
"""
Determine the frequency to perform the periodic operation of logging firewall rules.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.FirewallRulesLogPeriod", 86400)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/datacontract.py 0000664 0000000 0000000 00000005256 14626177470 0025276 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2019 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.common.exception import ProtocolError
import azurelinuxagent.common.logger as logger
# pylint: disable=W0105
"""
Base class for data contracts between guest and host and utilities to manipulate the properties in those contracts
"""
# pylint: enable=W0105
class DataContract(object):
pass
class DataContractList(list):
def __init__(self, item_cls): # pylint: disable=W0231
self.item_cls = item_cls
def validate_param(name, val, expected_type):
if val is None:
raise ProtocolError("{0} is None".format(name))
if not isinstance(val, expected_type):
raise ProtocolError(("{0} type should be {1} not {2}"
"").format(name, expected_type, type(val)))
def set_properties(name, obj, data):
if isinstance(obj, DataContract):
validate_param("Property '{0}'".format(name), data, dict)
for prob_name, prob_val in data.items():
prob_full_name = "{0}.{1}".format(name, prob_name)
try:
prob = getattr(obj, prob_name)
except AttributeError:
logger.warn("Unknown property: {0}", prob_full_name)
continue
prob = set_properties(prob_full_name, prob, prob_val)
setattr(obj, prob_name, prob)
return obj
elif isinstance(obj, DataContractList):
validate_param("List '{0}'".format(name), data, list)
for item_data in data:
item = obj.item_cls()
item = set_properties(name, item, item_data)
obj.append(item)
return obj
else:
return data
def get_properties(obj):
if isinstance(obj, DataContract):
data = {}
props = vars(obj)
for prob_name, prob in list(props.items()):
data[prob_name] = get_properties(prob)
return data
elif isinstance(obj, DataContractList):
data = []
for item in obj:
item_data = get_properties(item)
data.append(item_data)
return data
else:
return obj
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/dhcp.py 0000664 0000000 0000000 00000035170 14626177470 0023543 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import array
import os
import socket
import time
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.exception import DhcpError
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils.restutil import KNOWN_WIRESERVER_IP
from azurelinuxagent.common.utils.textutil import hex_dump, hex_dump2, \
hex_dump3, \
compare_bytes, str_to_ord, \
unpack_big_endian, \
int_to_ip4_addr
# the kernel routing table representation of 168.63.129.16
KNOWN_WIRESERVER_IP_ENTRY = '10813FA8'
def get_dhcp_handler():
return DhcpHandler()
class DhcpHandler(object):
"""
Azure use DHCP option 245 to pass endpoint ip to VMs.
"""
def __init__(self):
self.osutil = get_osutil()
self.endpoint = None
self.gateway = None
self.routes = None
self._request_broadcast = False
self.skip_cache = False
def run(self):
"""
Send dhcp request
Configure default gateway and routes
Save wire server endpoint if found
"""
if self.wireserver_route_exists or self.dhcp_cache_exists:
return
self.send_dhcp_req()
self.conf_routes()
def wait_for_network(self):
"""
Wait for network stack to be initialized.
"""
ipv4 = self.osutil.get_ip4_addr()
while ipv4 == '' or ipv4 == '0.0.0.0':
logger.info("Waiting for network.")
time.sleep(10)
logger.info("Try to start network interface.")
self.osutil.start_network()
ipv4 = self.osutil.get_ip4_addr()
@property
def wireserver_route_exists(self):
"""
Determine whether a route to the known wireserver
ip already exists, and if so use that as the endpoint.
This is true when running in a virtual network.
:return: True if a route to KNOWN_WIRESERVER_IP exists.
"""
route_exists = False
logger.info("Test for route to {0}".format(KNOWN_WIRESERVER_IP))
try:
route_table = self.osutil.read_route_table()
if any((KNOWN_WIRESERVER_IP_ENTRY in route) for route in route_table):
# reset self.gateway and self.routes
# we do not need to alter the routing table
self.endpoint = KNOWN_WIRESERVER_IP
self.gateway = None
self.routes = None
route_exists = True
logger.info("Route to {0} exists".format(KNOWN_WIRESERVER_IP))
else:
logger.warn("No route exists to {0}".format(KNOWN_WIRESERVER_IP))
except Exception as e:
logger.error(
"Could not determine whether route exists to {0}: {1}".format(
KNOWN_WIRESERVER_IP, e))
return route_exists
@property
def dhcp_cache_exists(self):
"""
Check whether the dhcp options cache exists and contains the
wireserver endpoint, unless skip_cache is True.
:return: True if the cached endpoint was found in the dhcp lease
"""
if self.skip_cache:
return False
exists = False
logger.info("Checking for dhcp lease cache")
cached_endpoint = self.osutil.get_dhcp_lease_endpoint() # pylint: disable=E1128
if cached_endpoint is not None:
self.endpoint = cached_endpoint
exists = True
logger.info("Cache exists [{0}]".format(exists))
return exists
def conf_routes(self):
logger.info("Configure routes")
logger.info("Gateway:{0}", self.gateway)
logger.info("Routes:{0}", self.routes)
# Add default gateway
if self.gateway is not None and self.osutil.is_missing_default_route():
self.osutil.route_add(0, 0, self.gateway)
if self.routes is not None:
for route in self.routes:
self.osutil.route_add(route[0], route[1], route[2])
def _send_dhcp_req(self, request):
__waiting_duration__ = [0, 10, 30, 60, 60]
for duration in __waiting_duration__:
try:
self.osutil.allow_dhcp_broadcast()
response = socket_send(request)
validate_dhcp_resp(request, response)
return response
except DhcpError as e:
logger.warn("Failed to send DHCP request: {0}", e)
time.sleep(duration)
return None
def send_dhcp_req(self):
"""
Check if DHCP is available
"""
dhcp_available = self.osutil.is_dhcp_available()
if not dhcp_available:
logger.info("send_dhcp_req: DHCP not available")
self.endpoint = KNOWN_WIRESERVER_IP
return
# pylint: disable=W0105
"""
Build dhcp request with mac addr
Configure route to allow dhcp traffic
Stop dhcp service if necessary
"""
# pylint: enable=W0105
logger.info("Send dhcp request")
mac_addr = self.osutil.get_mac_addr()
# Do unicast first, then fallback to broadcast if fails.
req = build_dhcp_request(mac_addr, self._request_broadcast)
if not self._request_broadcast:
self._request_broadcast = True
# Temporary allow broadcast for dhcp. Remove the route when done.
missing_default_route = self.osutil.is_missing_default_route()
ifname = self.osutil.get_if_name()
if missing_default_route:
self.osutil.set_route_for_dhcp_broadcast(ifname)
# In some distros, dhcp service needs to be shutdown before agent probe
# endpoint through dhcp.
if self.osutil.is_dhcp_enabled():
self.osutil.stop_dhcp_service()
resp = self._send_dhcp_req(req)
if self.osutil.is_dhcp_enabled():
self.osutil.start_dhcp_service()
if missing_default_route:
self.osutil.remove_route_for_dhcp_broadcast(ifname)
if resp is None:
raise DhcpError("Failed to receive dhcp response.")
self.endpoint, self.gateway, self.routes = parse_dhcp_resp(resp)
def validate_dhcp_resp(request, response): # pylint: disable=R1710
bytes_recv = len(response)
if bytes_recv < 0xF6:
logger.error("HandleDhcpResponse: Too few bytes received:{0}",
bytes_recv)
return False
logger.verbose("BytesReceived:{0}", hex(bytes_recv))
logger.verbose("DHCP response:{0}", hex_dump(response, bytes_recv))
# check transactionId, cookie, MAC address cookie should never mismatch
# transactionId and MAC address may mismatch if we see a response
# meant from another machine
if not compare_bytes(request, response, 0xEC, 4):
logger.verbose("Cookie not match:\nsend={0},\nreceive={1}",
hex_dump3(request, 0xEC, 4),
hex_dump3(response, 0xEC, 4))
raise DhcpError("Cookie in dhcp respones doesn't match the request")
if not compare_bytes(request, response, 4, 4):
logger.verbose("TransactionID not match:\nsend={0},\nreceive={1}",
hex_dump3(request, 4, 4),
hex_dump3(response, 4, 4))
raise DhcpError("TransactionID in dhcp respones "
"doesn't match the request")
if not compare_bytes(request, response, 0x1C, 6):
logger.verbose("Mac Address not match:\nsend={0},\nreceive={1}",
hex_dump3(request, 0x1C, 6),
hex_dump3(response, 0x1C, 6))
raise DhcpError("Mac Addr in dhcp respones "
"doesn't match the request")
def parse_route(response, option, i, length, bytes_recv): # pylint: disable=W0613
# http://msdn.microsoft.com/en-us/library/cc227282%28PROT.10%29.aspx
logger.verbose("Routes at offset: {0} with length:{1}", hex(i),
hex(length))
routes = []
if length < 5:
logger.error("Data too small for option:{0}", option)
j = i + 2
while j < (i + length + 2):
mask_len_bits = str_to_ord(response[j])
mask_len_bytes = (((mask_len_bits + 7) & ~7) >> 3)
mask = 0xFFFFFFFF & (0xFFFFFFFF << (32 - mask_len_bits))
j += 1
net = unpack_big_endian(response, j, mask_len_bytes)
net <<= (32 - mask_len_bytes * 8)
net &= mask
j += mask_len_bytes
gateway = unpack_big_endian(response, j, 4)
j += 4
routes.append((net, mask, gateway))
if j != (i + length + 2):
logger.error("Unable to parse routes")
return routes
def parse_ip_addr(response, option, i, length, bytes_recv):
if i + 5 < bytes_recv:
if length != 4:
logger.error("Endpoint or Default Gateway not 4 bytes")
return None
addr = unpack_big_endian(response, i + 2, 4)
ip_addr = int_to_ip4_addr(addr)
return ip_addr
else:
logger.error("Data too small for option:{0}", option)
return None
def parse_dhcp_resp(response):
"""
Parse DHCP response:
Returns endpoint server or None on error.
"""
logger.verbose("parse Dhcp Response")
bytes_recv = len(response)
endpoint = None
gateway = None
routes = None
# Walk all the returned options, parsing out what we need, ignoring the
# others. We need the custom option 245 to find the the endpoint we talk to
# as well as to handle some Linux DHCP client incompatibilities;
# options 3 for default gateway and 249 for routes; 255 is end.
i = 0xF0 # offset to first option
while i < bytes_recv:
option = str_to_ord(response[i])
length = 0
if (i + 1) < bytes_recv:
length = str_to_ord(response[i + 1])
logger.verbose("DHCP option {0} at offset:{1} with length:{2}",
hex(option), hex(i), hex(length))
if option == 255:
logger.verbose("DHCP packet ended at offset:{0}", hex(i))
break
elif option == 249:
routes = parse_route(response, option, i, length, bytes_recv)
elif option == 3:
gateway = parse_ip_addr(response, option, i, length, bytes_recv)
logger.verbose("Default gateway:{0}, at {1}", gateway, hex(i))
elif option == 245:
endpoint = parse_ip_addr(response, option, i, length, bytes_recv)
logger.verbose("Azure wire protocol endpoint:{0}, at {1}",
endpoint,
hex(i))
else:
logger.verbose("Skipping DHCP option:{0} at {1} with length {2}",
hex(option), hex(i), hex(length))
i += length + 2
return endpoint, gateway, routes
def socket_send(request):
sock = None
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM,
socket.IPPROTO_UDP)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(("0.0.0.0", 68))
sock.sendto(request, ("", 67))
sock.settimeout(10)
logger.verbose("Send DHCP request: Setting socket.timeout=10, "
"entering recv")
response = sock.recv(1024)
return response
except IOError as e:
raise DhcpError("{0}".format(e))
finally:
if sock is not None:
sock.close()
def build_dhcp_request(mac_addr, request_broadcast):
"""
Build DHCP request string.
"""
#
# typedef struct _DHCP {
# UINT8 Opcode; /* op: BOOTREQUEST or BOOTREPLY */
# UINT8 HardwareAddressType; /* htype: ethernet */
# UINT8 HardwareAddressLength; /* hlen: 6 (48 bit mac address) */
# UINT8 Hops; /* hops: 0 */
# UINT8 TransactionID[4]; /* xid: random */
# UINT8 Seconds[2]; /* secs: 0 */
# UINT8 Flags[2]; /* flags: 0 or 0x8000 for broadcast*/
# UINT8 ClientIpAddress[4]; /* ciaddr: 0 */
# UINT8 YourIpAddress[4]; /* yiaddr: 0 */
# UINT8 ServerIpAddress[4]; /* siaddr: 0 */
# UINT8 RelayAgentIpAddress[4]; /* giaddr: 0 */
# UINT8 ClientHardwareAddress[16]; /* chaddr: 6 byte eth MAC address */
# UINT8 ServerName[64]; /* sname: 0 */
# UINT8 BootFileName[128]; /* file: 0 */
# UINT8 MagicCookie[4]; /* 99 130 83 99 */
# /* 0x63 0x82 0x53 0x63 */
# /* options -- hard code ours */
#
# UINT8 MessageTypeCode; /* 53 */
# UINT8 MessageTypeLength; /* 1 */
# UINT8 MessageType; /* 1 for DISCOVER */
# UINT8 End; /* 255 */
# } DHCP;
#
# tuple of 244 zeros
# (struct.pack_into would be good here, but requires Python 2.5)
request = [0] * 244
trans_id = gen_trans_id()
# Opcode = 1
# HardwareAddressType = 1 (ethernet/MAC)
# HardwareAddressLength = 6 (ethernet/MAC/48 bits)
for a in range(0, 3):
request[a] = [1, 1, 6][a]
# fill in transaction id (random number to ensure response matches request)
for a in range(0, 4):
request[4 + a] = str_to_ord(trans_id[a])
logger.verbose("BuildDhcpRequest: transactionId:%s,%04X" % (
hex_dump2(trans_id),
unpack_big_endian(request, 4, 4)))
if request_broadcast:
# set broadcast flag to true to request the dhcp server
# to respond to a boradcast address,
# this is useful when user dhclient fails.
request[0x0A] = 0x80
# fill in ClientHardwareAddress
for a in range(0, 6):
request[0x1C + a] = str_to_ord(mac_addr[a])
# DHCP Magic Cookie: 99, 130, 83, 99
# MessageTypeCode = 53 DHCP Message Type
# MessageTypeLength = 1
# MessageType = DHCPDISCOVER
# End = 255 DHCP_END
for a in range(0, 8):
request[0xEC + a] = [99, 130, 83, 99, 53, 1, 1, 255][a]
return array.array("B", request)
def gen_trans_id():
return os.urandom(4)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/errorstate.py 0000664 0000000 0000000 00000002166 14626177470 0025016 0 ustar 00root root 0000000 0000000 from datetime import datetime, timedelta
ERROR_STATE_DELTA_DEFAULT = timedelta(minutes=15)
ERROR_STATE_DELTA_INSTALL = timedelta(minutes=5)
ERROR_STATE_HOST_PLUGIN_FAILURE = timedelta(minutes=5)
class ErrorState(object):
def __init__(self, min_timedelta=ERROR_STATE_DELTA_DEFAULT):
self.min_timedelta = min_timedelta
self.count = 0
self.timestamp = None
def incr(self):
if self.count == 0:
self.timestamp = datetime.utcnow()
self.count += 1
def reset(self):
self.count = 0
self.timestamp = None
def is_triggered(self):
if self.timestamp is None:
return False
delta = datetime.utcnow() - self.timestamp
if delta >= self.min_timedelta:
return True
return False
@property
def fail_time(self):
if self.timestamp is None:
return 'unknown'
delta = round((datetime.utcnow() - self.timestamp).seconds / 60.0, 2)
if delta < 60:
return '{0} min'.format(delta)
delta_hr = round(delta / 60.0, 2)
return '{0} hr'.format(delta_hr)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/event.py 0000664 0000000 0000000 00000100544 14626177470 0023744 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import atexit
import json
import os
import platform
import re
import sys
import threading
import time
import traceback
from datetime import datetime
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.exception import EventError, OSUtilError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.datacontract import get_properties, set_properties
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.telemetryevent import TelemetryEventParam, TelemetryEvent, CommonTelemetryEventSchema, \
GuestAgentGenericLogsSchema, GuestAgentExtensionEventsSchema, GuestAgentPerfCounterEventsSchema
from azurelinuxagent.common.utils import fileutil, textutil
from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, getattrib, str_to_encoded_ustr
from azurelinuxagent.common.version import CURRENT_VERSION, CURRENT_AGENT, AGENT_NAME, DISTRO_NAME, DISTRO_VERSION, DISTRO_CODE_NAME, AGENT_EXECUTION_MODE
from azurelinuxagent.common.protocol.imds import get_imds_client
EVENTS_DIRECTORY = "events"
_EVENT_MSG = "Event: name={0}, op={1}, message={2}, duration={3}"
TELEMETRY_EVENT_PROVIDER_ID = "69B669B9-4AF8-4C50-BDC4-6006FA76E975"
TELEMETRY_EVENT_EVENT_ID = 1
TELEMETRY_METRICS_EVENT_ID = 4
TELEMETRY_LOG_PROVIDER_ID = "FFF0196F-EE4C-4EAF-9AA5-776F622DEB4F"
TELEMETRY_LOG_EVENT_ID = 7
#
# When this flag is enabled the TODO comment in Logger.log() needs to be addressed; also the tests
# marked with "Enable this test when SEND_LOGS_TO_TELEMETRY is enabled" should be enabled.
#
SEND_LOGS_TO_TELEMETRY = False
MAX_NUMBER_OF_EVENTS = 1000
AGENT_EVENT_FILE_EXTENSION = '.waagent.tld'
EVENT_FILE_REGEX = re.compile(r'(?P\.waagent)?\.tld$')
def send_logs_to_telemetry():
return SEND_LOGS_TO_TELEMETRY
class WALAEventOperation:
ActivateResourceDisk = "ActivateResourceDisk"
AgentBlacklisted = "AgentBlacklisted"
AgentEnabled = "AgentEnabled"
AgentMemory = "AgentMemory"
AgentUpgrade = "AgentUpgrade"
ArtifactsProfileBlob = "ArtifactsProfileBlob"
CGroupsCleanUp = "CGroupsCleanUp"
CGroupsDisabled = "CGroupsDisabled"
CGroupsInfo = "CGroupsInfo"
CloudInit = "CloudInit"
CollectEventErrors = "CollectEventErrors"
CollectEventUnicodeErrors = "CollectEventUnicodeErrors"
ConfigurationChange = "ConfigurationChange"
CustomData = "CustomData"
DefaultChannelChange = "DefaultChannelChange"
Deploy = "Deploy"
Disable = "Disable"
Downgrade = "Downgrade"
Download = "Download"
Enable = "Enable"
ExtensionProcessing = "ExtensionProcessing"
ExtensionTelemetryEventProcessing = "ExtensionTelemetryEventProcessing"
FetchGoalState = "FetchGoalState"
Firewall = "Firewall"
GoalState = "GoalState"
GoalStateUnsupportedFeatures = "GoalStateUnsupportedFeatures"
HealthCheck = "HealthCheck"
HealthObservation = "HealthObservation"
HeartBeat = "HeartBeat"
HostnamePublishing = "HostnamePublishing"
HostPlugin = "HostPlugin"
HostPluginHeartbeat = "HostPluginHeartbeat"
HostPluginHeartbeatExtended = "HostPluginHeartbeatExtended"
HttpErrors = "HttpErrors"
HttpGet = "HttpGet"
ImdsHeartbeat = "ImdsHeartbeat"
Install = "Install"
InitializeHostPlugin = "InitializeHostPlugin"
Log = "Log"
LogCollection = "LogCollection"
NoExec = "NoExec"
OSInfo = "OSInfo"
OpenSsl = "OpenSsl"
Partition = "Partition"
PersistFirewallRules = "PersistFirewallRules"
ProvisionAfterExtensions = "ProvisionAfterExtensions"
PluginSettingsVersionMismatch = "PluginSettingsVersionMismatch"
InvalidExtensionConfig = "InvalidExtensionConfig"
Provision = "Provision"
ProvisionGuestAgent = "ProvisionGuestAgent"
RemoteAccessHandling = "RemoteAccessHandling"
ReportEventErrors = "ReportEventErrors"
ReportEventUnicodeErrors = "ReportEventUnicodeErrors"
ReportStatus = "ReportStatus"
ReportStatusExtended = "ReportStatusExtended"
ResetFirewall = "ResetFirewall"
Restart = "Restart"
SequenceNumberMismatch = "SequenceNumberMismatch"
SetCGroupsLimits = "SetCGroupsLimits"
SkipUpdate = "SkipUpdate"
StatusProcessing = "StatusProcessing"
UnhandledError = "UnhandledError"
UnInstall = "UnInstall"
Unknown = "Unknown"
Update = "Update"
VmSettings = "VmSettings"
VmSettingsSummary = "VmSettingsSummary"
SHOULD_ENCODE_MESSAGE_LEN = 80
SHOULD_ENCODE_MESSAGE_OP = [
WALAEventOperation.Disable,
WALAEventOperation.Enable,
WALAEventOperation.Install,
WALAEventOperation.UnInstall,
]
class EventStatus(object):
EVENT_STATUS_FILE = "event_status.json"
def __init__(self):
self._path = None
self._status = {}
def clear(self):
self._status = {}
self._save()
def event_marked(self, name, version, op):
return self._event_name(name, version, op) in self._status
def event_succeeded(self, name, version, op):
event = self._event_name(name, version, op)
if event not in self._status:
return True
return self._status[event] is True
def initialize(self, status_dir=conf.get_lib_dir()):
self._path = os.path.join(status_dir, EventStatus.EVENT_STATUS_FILE)
self._load()
def mark_event_status(self, name, version, op, status):
event = self._event_name(name, version, op)
self._status[event] = (status is True)
self._save()
def _event_name(self, name, version, op):
return "{0}-{1}-{2}".format(name, version, op)
def _load(self):
try:
self._status = {}
if os.path.isfile(self._path):
with open(self._path, 'r') as f:
self._status = json.load(f)
except Exception as e:
logger.warn("Exception occurred loading event status: {0}".format(e))
self._status = {}
def _save(self):
try:
with open(self._path, 'w') as f:
json.dump(self._status, f)
except Exception as e:
logger.warn("Exception occurred saving event status: {0}".format(e))
__event_status__ = EventStatus()
__event_status_operations__ = [
WALAEventOperation.ReportStatus
]
def parse_json_event(data_str):
data = json.loads(data_str)
event = TelemetryEvent()
set_properties("TelemetryEvent", event, data)
event.file_type = "json"
return event
def parse_event(data_str):
try:
try:
return parse_json_event(data_str)
except ValueError:
return parse_xml_event(data_str)
except Exception as e:
raise EventError("Error parsing event: {0}".format(ustr(e)))
def parse_xml_param(param_node):
name = getattrib(param_node, "Name")
value_str = getattrib(param_node, "Value")
attr_type = getattrib(param_node, "T")
value = value_str
if attr_type == 'mt:uint64':
value = int(value_str)
elif attr_type == 'mt:bool':
value = bool(value_str)
elif attr_type == 'mt:float64':
value = float(value_str)
return TelemetryEventParam(name, value)
def parse_xml_event(data_str):
try:
xml_doc = parse_doc(data_str)
event_id = getattrib(find(xml_doc, "Event"), 'id')
provider_id = getattrib(find(xml_doc, "Provider"), 'id')
event = TelemetryEvent(event_id, provider_id)
param_nodes = findall(xml_doc, 'Param')
for param_node in param_nodes:
event.parameters.append(parse_xml_param(param_node))
event.file_type = "xml"
return event
except Exception as e:
raise ValueError(ustr(e))
def _encode_message(op, message):
"""
Gzip and base64 encode a message based on the operation.
The intent of this message is to make the logs human readable and include the
stdout/stderr from extension operations. Extension operations tend to generate
a lot of noise, which makes it difficult to parse the line-oriented waagent.log.
The compromise is to encode the stdout/stderr so we preserve the data and do
not destroy the line oriented nature.
The data can be recovered using the following command:
$ echo '' | base64 -d | pigz -zd
You may need to install the pigz command.
:param op: Operation, e.g. Enable or Install
:param message: Message to encode
:return: gzip'ed and base64 encoded message, or the original message
"""
if len(message) == 0:
return message
if op not in SHOULD_ENCODE_MESSAGE_OP:
return message
try:
return textutil.compress(message)
except Exception:
# If the message could not be encoded a dummy message ('<>') is returned.
# The original message was still sent via telemetry, so all is not lost.
return "<>"
def _log_event(name, op, message, duration, is_success=True):
global _EVENT_MSG # pylint: disable=W0603
if not is_success:
logger.error(_EVENT_MSG, name, op, message, duration)
else:
logger.info(_EVENT_MSG, name, op, message, duration)
class CollectOrReportEventDebugInfo(object):
"""
This class is used for capturing and reporting debug info that is captured during event collection and
reporting to wireserver.
It captures the count of unicode errors and any unexpected errors and also a subset of errors with stacks to help
with debugging any potential issues.
"""
__MAX_ERRORS_TO_REPORT = 5
OP_REPORT = "Report"
OP_COLLECT = "Collect"
def __init__(self, operation=OP_REPORT):
self.__unicode_error_count = 0
self.__unicode_errors = set()
self.__op_error_count = 0
self.__op_errors = set()
if operation == self.OP_REPORT:
self.__unicode_error_event = WALAEventOperation.ReportEventUnicodeErrors
self.__op_errors_event = WALAEventOperation.ReportEventErrors
elif operation == self.OP_COLLECT:
self.__unicode_error_event = WALAEventOperation.CollectEventUnicodeErrors
self.__op_errors_event = WALAEventOperation.CollectEventErrors
def report_debug_info(self):
def report_dropped_events_error(count, errors, operation_name):
err_msg_format = "DroppedEventsCount: {0}\nReasons (first {1} errors): {2}"
if count > 0:
add_event(op=operation_name,
message=err_msg_format.format(count, CollectOrReportEventDebugInfo.__MAX_ERRORS_TO_REPORT, ', '.join(errors)),
is_success=False)
report_dropped_events_error(self.__op_error_count, self.__op_errors, self.__op_errors_event)
report_dropped_events_error(self.__unicode_error_count, self.__unicode_errors, self.__unicode_error_event)
@staticmethod
def _update_errors_and_get_count(error_count, errors, error):
error_count += 1
if len(errors) < CollectOrReportEventDebugInfo.__MAX_ERRORS_TO_REPORT:
errors.add("{0}: {1}".format(ustr(error), traceback.format_exc()))
return error_count
def update_unicode_error(self, unicode_err):
self.__unicode_error_count = self._update_errors_and_get_count(self.__unicode_error_count, self.__unicode_errors,
unicode_err)
def update_op_error(self, op_err):
self.__op_error_count = self._update_errors_and_get_count(self.__op_error_count, self.__op_errors, op_err)
class EventLogger(object):
def __init__(self):
self.event_dir = None
self.periodic_events = {}
#
# All events should have these parameters.
#
# The first set comes from the current OS and is initialized here. These values don't change during
# the agent's lifetime.
#
# The next two sets come from the goal state and IMDS and must be explicitly initialized using
# initialize_vminfo_common_parameters() once a protocol for communication with the host has been
# created. Their values don't change during the agent's lifetime. Note that we initialize these
# parameters here using dummy values (*_UNINITIALIZED) since events sent to the host should always
# match the schema defined for them in the telemetry pipeline.
#
# There is another set of common parameters that must be computed at the time the event is created
# (e.g. the timestamp and the container ID); those are added to events (along with the parameters
# below) in _add_common_event_parameters()
#
# Note that different kinds of events may also include other parameters; those are added by the
# corresponding add_* method (e.g. add_metric for performance metrics).
#
self._common_parameters = []
# Parameters from OS
osutil = get_osutil()
keyword_name = {
"CpuArchitecture": osutil.get_vm_arch()
}
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.OSVersion, EventLogger._get_os_version()))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.ExecutionMode, AGENT_EXECUTION_MODE))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.RAM, int(EventLogger._get_ram(osutil))))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.Processors, int(EventLogger._get_processors(osutil))))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.KeywordName, json.dumps(keyword_name)))
# Parameters from goal state
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.TenantName, "TenantName_UNINITIALIZED"))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.RoleName, "RoleName_UNINITIALIZED"))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.RoleInstanceName, "RoleInstanceName_UNINITIALIZED"))
#
# # Parameters from IMDS
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.Location, "Location_UNINITIALIZED"))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.SubscriptionId, "SubscriptionId_UNINITIALIZED"))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.ResourceGroupName, "ResourceGroupName_UNINITIALIZED"))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.VMId, "VMId_UNINITIALIZED"))
self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.ImageOrigin, 0))
@staticmethod
def _get_os_version():
return "{0}:{1}-{2}-{3}:{4}".format(platform.system(), DISTRO_NAME, DISTRO_VERSION, DISTRO_CODE_NAME, platform.release())
@staticmethod
def _get_ram(osutil):
try:
return osutil.get_total_mem()
except OSUtilError as e:
logger.warn("Failed to get RAM info; will be missing from telemetry: {0}", ustr(e))
return 0
@staticmethod
def _get_processors(osutil):
try:
return osutil.get_processor_cores()
except OSUtilError as e:
logger.warn("Failed to get Processors info; will be missing from telemetry: {0}", ustr(e))
return 0
def initialize_vminfo_common_parameters(self, protocol):
"""
Initializes the common parameters that come from the goal state and IMDS
"""
# create an index of the event parameters for faster updates
parameters = {}
for p in self._common_parameters:
parameters[p.name] = p
try:
vminfo = protocol.get_vminfo()
parameters[CommonTelemetryEventSchema.TenantName].value = vminfo.tenantName
parameters[CommonTelemetryEventSchema.RoleName].value = vminfo.roleName
parameters[CommonTelemetryEventSchema.RoleInstanceName].value = vminfo.roleInstanceName
except Exception as e:
logger.warn("Failed to get VM info from goal state; will be missing from telemetry: {0}", ustr(e))
try:
imds_client = get_imds_client(protocol.get_endpoint())
imds_info = imds_client.get_compute()
parameters[CommonTelemetryEventSchema.Location].value = imds_info.location
parameters[CommonTelemetryEventSchema.SubscriptionId].value = imds_info.subscriptionId
parameters[CommonTelemetryEventSchema.ResourceGroupName].value = imds_info.resourceGroupName
parameters[CommonTelemetryEventSchema.VMId].value = imds_info.vmId
parameters[CommonTelemetryEventSchema.ImageOrigin].value = int(imds_info.image_origin)
except Exception as e:
logger.warn("Failed to get IMDS info; will be missing from telemetry: {0}", ustr(e))
def save_event(self, data):
if self.event_dir is None:
logger.warn("Cannot save event -- Event reporter is not initialized.")
return
try:
fileutil.mkdir(self.event_dir, mode=0o700)
except (IOError, OSError) as e:
msg = "Failed to create events folder {0}. Error: {1}".format(self.event_dir, ustr(e))
raise EventError(msg)
try:
existing_events = os.listdir(self.event_dir)
if len(existing_events) >= MAX_NUMBER_OF_EVENTS:
logger.periodic_warn(logger.EVERY_MINUTE, "[PERIODIC] Too many files under: {0}, current count: {1}, "
"removing oldest event files".format(self.event_dir,
len(existing_events)))
existing_events.sort()
oldest_files = existing_events[:-999]
for event_file in oldest_files:
os.remove(os.path.join(self.event_dir, event_file))
except (IOError, OSError) as e:
msg = "Failed to remove old events from events folder {0}. Error: {1}".format(self.event_dir, ustr(e))
raise EventError(msg)
filename = os.path.join(self.event_dir,
ustr(int(time.time() * 1000000)))
try:
with open(filename + ".tmp", 'wb+') as hfile:
hfile.write(data.encode("utf-8"))
os.rename(filename + ".tmp", filename + AGENT_EVENT_FILE_EXTENSION)
except (IOError, OSError) as e:
msg = "Failed to write events to file: {0}".format(e)
raise EventError(msg)
def reset_periodic(self):
self.periodic_events = {}
def is_period_elapsed(self, delta, h):
return h not in self.periodic_events or \
(self.periodic_events[h] + delta) <= datetime.now()
def add_periodic(self, delta, name, op=WALAEventOperation.Unknown, is_success=True, duration=0,
version=str(CURRENT_VERSION), message="", log_event=True, force=False):
h = hash(name + op + ustr(is_success) + message)
if force or self.is_period_elapsed(delta, h):
self.add_event(name, op=op, is_success=is_success, duration=duration,
version=version, message=message, log_event=log_event)
self.periodic_events[h] = datetime.now()
def add_event(self, name, op=WALAEventOperation.Unknown, is_success=True, duration=0, version=str(CURRENT_VERSION),
message="", log_event=True):
if (not is_success) and log_event:
_log_event(name, op, message, duration, is_success=is_success)
event = TelemetryEvent(TELEMETRY_EVENT_EVENT_ID, TELEMETRY_EVENT_PROVIDER_ID)
event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Name, str_to_encoded_ustr(name)))
event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Version, str_to_encoded_ustr(version)))
event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Operation, str_to_encoded_ustr(op)))
event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.OperationSuccess, bool(is_success)))
event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Message, str_to_encoded_ustr(message)))
event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Duration, int(duration)))
self.add_common_event_parameters(event, datetime.utcnow())
data = get_properties(event)
try:
self.save_event(json.dumps(data))
except EventError as e:
logger.periodic_error(logger.EVERY_FIFTEEN_MINUTES, "[PERIODIC] {0}".format(ustr(e)))
def add_log_event(self, level, message):
event = TelemetryEvent(TELEMETRY_LOG_EVENT_ID, TELEMETRY_LOG_PROVIDER_ID)
event.parameters.append(TelemetryEventParam(GuestAgentGenericLogsSchema.EventName, WALAEventOperation.Log))
event.parameters.append(TelemetryEventParam(GuestAgentGenericLogsSchema.CapabilityUsed, logger.LogLevel.STRINGS[level]))
event.parameters.append(TelemetryEventParam(GuestAgentGenericLogsSchema.Context1, str_to_encoded_ustr(self._clean_up_message(message))))
event.parameters.append(TelemetryEventParam(GuestAgentGenericLogsSchema.Context2, datetime.utcnow().strftime(logger.Logger.LogTimeFormatInUTC)))
event.parameters.append(TelemetryEventParam(GuestAgentGenericLogsSchema.Context3, ''))
self.add_common_event_parameters(event, datetime.utcnow())
data = get_properties(event)
try:
self.save_event(json.dumps(data))
except EventError:
pass
def add_metric(self, category, counter, instance, value, log_event=False):
"""
Create and save an event which contains a telemetry event.
:param str category: The category of metric (e.g. "cpu", "memory")
:param str counter: The specific metric within the category (e.g. "%idle")
:param str instance: For instanced metrics, the instance identifier (filesystem name, cpu core#, etc.)
:param value: Value of the metric
:param bool log_event: If true, log the collected metric in the agent log
"""
if log_event:
message = "Metric {0}/{1} [{2}] = {3}".format(category, counter, instance, value)
_log_event(AGENT_NAME, "METRIC", message, 0)
event = TelemetryEvent(TELEMETRY_METRICS_EVENT_ID, TELEMETRY_EVENT_PROVIDER_ID)
event.parameters.append(TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Category, str_to_encoded_ustr(category)))
event.parameters.append(TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Counter, str_to_encoded_ustr(counter)))
event.parameters.append(TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Instance, str_to_encoded_ustr(instance)))
event.parameters.append(TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Value, float(value)))
self.add_common_event_parameters(event, datetime.utcnow())
data = get_properties(event)
try:
self.save_event(json.dumps(data))
except EventError as e:
logger.periodic_error(logger.EVERY_FIFTEEN_MINUTES, "[PERIODIC] {0}".format(ustr(e)))
@staticmethod
def _clean_up_message(message):
# By the time the message has gotten to this point it is formatted as
#
# Old Time format
# YYYY/MM/DD HH:mm:ss.fffffff LEVEL .
# YYYY/MM/DD HH:mm:ss.fffffff .
# YYYY/MM/DD HH:mm:ss LEVEL .
# YYYY/MM/DD HH:mm:ss .
#
# UTC ISO Time format added in #1716
# YYYY-MM-DDTHH:mm:ss.fffffffZ LEVEL .
# YYYY-MM-DDTHH:mm:ss.fffffffZ .
# YYYY-MM-DDTHH:mm:ssZ LEVEL .
# YYYY-MM-DDTHH:mm:ssZ .
#
# The timestamp and the level are redundant, and should be stripped. The logging library does not schematize
# this data, so I am forced to parse the message using a regex. The format is regular, so the burden is low,
# and usability on the telemetry side is high.
if not message:
return message
# Adding two regexs to simplify the handling of logs and to keep it maintainable. Most of the logs would have
# level includent in the log itself, but if it doesn't have, the second regex is a catch all case and will work
# for all the cases.
log_level_format_parser = re.compile(r"^.*(INFO|WARNING|ERROR|VERBOSE)\s*(.*)$")
log_format_parser = re.compile(r"^[0-9:/\-TZ\s.]*\s(.*)$")
# Parsing the log messages containing levels in it
extract_level_message = log_level_format_parser.search(message)
if extract_level_message:
return extract_level_message.group(2) # The message bit
else:
# Parsing the log messages without levels in it.
extract_message = log_format_parser.search(message)
if extract_message:
return extract_message.group(1) # The message bit
else:
return message
def add_common_event_parameters(self, event, event_timestamp):
"""
This method is called for all events and ensures all telemetry fields are added before the event is sent out.
Note that the event timestamp is saved in the OpcodeName field.
"""
common_params = [TelemetryEventParam(CommonTelemetryEventSchema.GAVersion, CURRENT_AGENT),
TelemetryEventParam(CommonTelemetryEventSchema.ContainerId, AgentGlobals.get_container_id()),
TelemetryEventParam(CommonTelemetryEventSchema.OpcodeName, event_timestamp.strftime(logger.Logger.LogTimeFormatInUTC)),
TelemetryEventParam(CommonTelemetryEventSchema.EventTid, threading.current_thread().ident),
TelemetryEventParam(CommonTelemetryEventSchema.EventPid, os.getpid()),
TelemetryEventParam(CommonTelemetryEventSchema.TaskName, threading.current_thread().getName())]
if event.eventId == TELEMETRY_EVENT_EVENT_ID and event.providerId == TELEMETRY_EVENT_PROVIDER_ID:
# Currently only the GuestAgentExtensionEvents has these columns, the other tables dont have them so skipping
# this data in those tables.
common_params.extend([TelemetryEventParam(GuestAgentExtensionEventsSchema.ExtensionType, event.file_type),
TelemetryEventParam(GuestAgentExtensionEventsSchema.IsInternal, False)])
event.parameters.extend(common_params)
event.parameters.extend(self._common_parameters)
__event_logger__ = EventLogger()
def get_event_logger():
return __event_logger__
def elapsed_milliseconds(utc_start):
now = datetime.utcnow()
if now < utc_start:
return 0
d = now - utc_start
return int(((d.days * 24 * 60 * 60 + d.seconds) * 1000) + \
(d.microseconds / 1000.0))
def report_event(op, is_success=True, message='', log_event=True):
add_event(AGENT_NAME,
version=str(CURRENT_VERSION),
is_success=is_success,
message=message,
op=op,
log_event=log_event)
def report_periodic(delta, op, is_success=True, message=''):
add_periodic(delta, AGENT_NAME,
version=str(CURRENT_VERSION),
is_success=is_success,
message=message,
op=op)
def report_metric(category, counter, instance, value, log_event=False, reporter=__event_logger__):
"""
Send a telemetry event reporting a single instance of a performance counter.
:param str category: The category of the metric (cpu, memory, etc)
:param str counter: The name of the metric ("%idle", etc)
:param str instance: For instanced metrics, the identifier of the instance. E.g. a disk drive name, a cpu core#
:param value: The value of the metric
:param bool log_event: If True, log the metric in the agent log as well
:param EventLogger reporter: The EventLogger instance to which metric events should be sent
"""
if reporter.event_dir is None:
logger.warn("Cannot report metric event -- Event reporter is not initialized.")
message = "Metric {0}/{1} [{2}] = {3}".format(category, counter, instance, value)
_log_event(AGENT_NAME, "METRIC", message, 0)
return
try:
reporter.add_metric(category, counter, instance, float(value), log_event)
except ValueError:
logger.periodic_warn(logger.EVERY_HALF_HOUR, "[PERIODIC] Cannot cast the metric value. Details of the Metric - "
"{0}/{1} [{2}] = {3}".format(category, counter, instance, value))
def initialize_event_logger_vminfo_common_parameters(protocol, reporter=__event_logger__):
reporter.initialize_vminfo_common_parameters(protocol)
def add_event(name=AGENT_NAME, op=WALAEventOperation.Unknown, is_success=True, duration=0, version=str(CURRENT_VERSION),
message="", log_event=True, reporter=__event_logger__):
if reporter.event_dir is None:
logger.warn("Cannot add event -- Event reporter is not initialized.")
_log_event(name, op, message, duration, is_success=is_success)
return
if should_emit_event(name, version, op, is_success):
mark_event_status(name, version, op, is_success)
reporter.add_event(name, op=op, is_success=is_success, duration=duration, version=str(version),
message=message,
log_event=log_event)
def add_log_event(level, message, forced=False, reporter=__event_logger__):
"""
:param level: LoggerLevel of the log event
:param message: Message
:param forced: Force write the event even if send_logs_to_telemetry() is disabled
(NOTE: Remove this flag once send_logs_to_telemetry() is enabled for all events)
:param reporter:
:return:
"""
if reporter.event_dir is None:
return
if not (forced or send_logs_to_telemetry()):
return
if level >= logger.LogLevel.WARNING:
reporter.add_log_event(level, message)
def add_periodic(delta, name, op=WALAEventOperation.Unknown, is_success=True, duration=0, version=str(CURRENT_VERSION),
message="", log_event=True, force=False, reporter=__event_logger__):
if reporter.event_dir is None:
logger.warn("Cannot add periodic event -- Event reporter is not initialized.")
_log_event(name, op, message, duration, is_success=is_success)
return
reporter.add_periodic(delta, name, op=op, is_success=is_success, duration=duration, version=str(version),
message=message, log_event=log_event, force=force)
def mark_event_status(name, version, op, status):
if op in __event_status_operations__:
__event_status__.mark_event_status(name, version, op, status)
def should_emit_event(name, version, op, status):
return \
op not in __event_status_operations__ or \
__event_status__ is None or \
not __event_status__.event_marked(name, version, op) or \
__event_status__.event_succeeded(name, version, op) != status
def init_event_logger(event_dir):
__event_logger__.event_dir = event_dir
def init_event_status(status_dir):
__event_status__.initialize(status_dir)
def dump_unhandled_err(name):
if hasattr(sys, 'last_type') and hasattr(sys, 'last_value') and \
hasattr(sys, 'last_traceback'):
last_type = getattr(sys, 'last_type')
last_value = getattr(sys, 'last_value')
last_traceback = getattr(sys, 'last_traceback')
error = traceback.format_exception(last_type, last_value,
last_traceback)
message = "".join(error)
add_event(name, is_success=False, message=message,
op=WALAEventOperation.UnhandledError)
def enable_unhandled_err_dump(name):
atexit.register(dump_unhandled_err, name)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/exception.py 0000664 0000000 0000000 00000017715 14626177470 0024630 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
"""
Defines all exceptions
"""
class ExitException(BaseException):
"""
Used to exit the agent's process
"""
def __init__(self, reason):
super(ExitException, self).__init__()
self.reason = reason
class AgentUpgradeExitException(ExitException):
"""
Used to exit the agent's process due to Agent Upgrade
"""
class AgentError(Exception):
"""
Base class of agent error.
"""
def __init__(self, msg, inner=None):
msg = u"[{0}] {1}".format(type(self).__name__, msg)
if inner is not None:
msg = u"{0}\nInner error: {1}".format(msg, inner)
super(AgentError, self).__init__(msg)
class AgentConfigError(AgentError):
"""
When configure file is not found or malformed.
"""
def __init__(self, msg=None, inner=None):
super(AgentConfigError, self).__init__(msg, inner)
class AgentMemoryExceededException(AgentError):
"""
When Agent memory limit reached.
"""
def __init__(self, msg=None, inner=None):
super(AgentMemoryExceededException, self).__init__(msg, inner)
class AgentNetworkError(AgentError):
"""
When network is not available.
"""
def __init__(self, msg=None, inner=None):
super(AgentNetworkError, self).__init__(msg, inner)
class AgentUpdateError(AgentError):
"""
When agent failed to update.
"""
def __init__(self, msg=None, inner=None):
super(AgentUpdateError, self).__init__(msg, inner)
class AgentFamilyMissingError(AgentError):
"""
When agent family is missing.
"""
def __init__(self, msg=None, inner=None):
super(AgentFamilyMissingError, self).__init__(msg, inner)
class CGroupsException(AgentError):
"""
Exception to classify any cgroups related issue.
"""
def __init__(self, msg=None, inner=None):
super(CGroupsException, self).__init__(msg, inner)
class ExtensionError(AgentError):
"""
When failed to execute an extension
"""
def __init__(self, msg=None, inner=None, code=-1):
super(ExtensionError, self).__init__(msg, inner)
self.code = code
class ExtensionOperationError(ExtensionError):
"""
When the command times out or returns with a non-zero exit_code
"""
def __init__(self, msg=None, inner=None, code=-1, exit_code=-1):
super(ExtensionOperationError, self).__init__(msg, inner)
self.code = code
self.exit_code = exit_code
class ExtensionUpdateError(ExtensionError):
"""
Error raised when failed to update an extension
"""
class ExtensionDownloadError(ExtensionError):
"""
Error raised when failed to download and setup an extension
"""
class ExtensionsGoalStateError(ExtensionError):
"""
Error raised when the ExtensionsGoalState is malformed
"""
class ExtensionsConfigError(ExtensionsGoalStateError):
"""
Error raised when the ExtensionsConfig is malformed
"""
class MultiConfigExtensionEnableError(ExtensionError):
"""
Error raised when enable for a Multi-Config extension is failing.
"""
class ProvisionError(AgentError):
"""
When provision failed
"""
def __init__(self, msg=None, inner=None):
super(ProvisionError, self).__init__(msg, inner)
class ResourceDiskError(AgentError):
"""
Mount resource disk failed
"""
def __init__(self, msg=None, inner=None):
super(ResourceDiskError, self).__init__(msg, inner)
class DhcpError(AgentError):
"""
Failed to handle dhcp response
"""
def __init__(self, msg=None, inner=None):
super(DhcpError, self).__init__(msg, inner)
class OSUtilError(AgentError):
"""
Failed to perform operation to OS configuration
"""
def __init__(self, msg=None, inner=None):
super(OSUtilError, self).__init__(msg, inner)
class ProtocolError(AgentError):
"""
Azure protocol error
"""
def __init__(self, msg=None, inner=None):
super(ProtocolError, self).__init__(msg, inner)
class ProtocolNotFoundError(ProtocolError):
"""
Error raised when Azure protocol endpoint not found
"""
class HttpError(AgentError):
"""
Http request failure
"""
def __init__(self, msg=None, inner=None):
super(HttpError, self).__init__(msg, inner)
class InvalidContainerError(HttpError):
"""
Error raised when Container id sent in the header is invalid
"""
class EventError(AgentError):
"""
Event reporting error
"""
def __init__(self, msg=None, inner=None):
super(EventError, self).__init__(msg, inner)
class CryptError(AgentError):
"""
Encrypt/Decrypt error
"""
def __init__(self, msg=None, inner=None):
super(CryptError, self).__init__(msg, inner)
class UpdateError(AgentError):
"""
Update Guest Agent error
"""
def __init__(self, msg=None, inner=None):
super(UpdateError, self).__init__(msg, inner)
class ResourceGoneError(HttpError):
"""
The requested resource no longer exists (i.e., status code 410)
"""
def __init__(self, msg=None, inner=None):
if msg is None:
msg = "Resource is gone"
super(ResourceGoneError, self).__init__(msg, inner)
class InvalidExtensionEventError(AgentError):
"""
Error thrown when the extension telemetry event is invalid as defined per the contract with extensions.
"""
# Types of InvalidExtensionEventError
MissingKeyError = "MissingKeyError"
EmptyMessageError = "EmptyMessageError"
OversizeEventError = "OversizeEventError"
def __init__(self, msg=None, inner=None):
super(InvalidExtensionEventError, self).__init__(msg, inner)
class ServiceStoppedError(AgentError):
"""
Error thrown when trying to access a Service which is stopped
"""
def __init__(self, msg=None, inner=None):
super(ServiceStoppedError, self).__init__(msg, inner)
class ExtensionErrorCodes(object):
"""
Common Error codes used across by Compute RP for better understanding
the cause and clarify common occurring errors
"""
# Unknown Failures
PluginUnknownFailure = -1
# Success
PluginSuccess = 0
# Catch all error code.
PluginProcessingError = 1000
# Plugin failed to download
PluginManifestDownloadError = 1001
# Cannot find or load successfully the HandlerManifest.json
PluginHandlerManifestNotFound = 1002
# Cannot successfully serialize the HandlerManifest.json
PluginHandlerManifestDeserializationError = 1003
# Cannot download the plugin package
PluginPackageDownloadFailed = 1004
# Cannot extract the plugin form package
PluginPackageExtractionFailed = 1005
# Install failed
PluginInstallProcessingFailed = 1007
# Update failed
PluginUpdateProcessingFailed = 1008
# Enable failed
PluginEnableProcessingFailed = 1009
# Disable failed
PluginDisableProcessingFailed = 1010
# Extension script timed out
PluginHandlerScriptTimedout = 1011
# Invalid status file of the extension.
PluginSettingsStatusInvalid = 1012
def __init__(self):
pass
class GoalStateAggregateStatusCodes(object):
# Success
Success = 0
# Unknown failure
GoalStateUnknownFailure = -1
# The goal state requires features that are not supported by this version of the VM agent
GoalStateUnsupportedRequiredFeatures = 2001
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/future.py 0000664 0000000 0000000 00000014467 14626177470 0024145 0 ustar 00root root 0000000 0000000 import contextlib
import platform
import sys
import os
import re
# Note broken dependency handling to avoid potential backward
# compatibility issues on different distributions
try:
import distro # pylint: disable=E0401
except Exception:
pass
# pylint: disable=W0105
"""
Add alias for python2 and python3 libs and functions.
"""
# pylint: enable=W0105
if sys.version_info[0] == 3:
import http.client as httpclient # pylint: disable=W0611,import-error
from urllib.parse import urlparse # pylint: disable=W0611,import-error,no-name-in-module
"""Rename Python3 str to ustr""" # pylint: disable=W0105
ustr = str
bytebuffer = memoryview
# We aren't using these imports in this file, but we want them to be available
# to import from this module in others.
# Additionally, python2 doesn't have this, so we need to disable import-error
# as well.
# unused-import, import-error Disabled: Due to backward compatibility between py2 and py3
from builtins import int, range # pylint: disable=unused-import,import-error
from collections import OrderedDict # pylint: disable=W0611
from queue import Queue, Empty # pylint: disable=W0611,import-error
# unused-import Disabled: python2.7 doesn't have subprocess.DEVNULL
# so this import is only used by python3.
import subprocess # pylint: disable=unused-import
elif sys.version_info[0] == 2:
import httplib as httpclient # pylint: disable=E0401,W0611
from urlparse import urlparse # pylint: disable=E0401
from Queue import Queue, Empty # pylint: disable=W0611,import-error
# We want to suppress the following:
# - undefined-variable:
# These builtins are not defined in python3
# - redefined-builtin:
# This is intentional, so that code that wants to use builtins we're
# assigning new names to doesn't need to check python versions before
# doing so.
# pylint: disable=undefined-variable,redefined-builtin
ustr = unicode # Rename Python2 unicode to ustr
bytebuffer = buffer
range = xrange
int = long
if sys.version_info[1] >= 7:
from collections import OrderedDict # For Py 2.7+
else:
from ordereddict import OrderedDict # Works only on 2.6 # pylint: disable=E0401
else:
raise ImportError("Unknown python version: {0}".format(sys.version_info))
def get_linux_distribution(get_full_name, supported_dists):
"""Abstract platform.linux_distribution() call which is deprecated as of
Python 3.5 and removed in Python 3.7"""
try:
supported = platform._supported_dists + (supported_dists,)
osinfo = list(
platform.linux_distribution( # pylint: disable=W1505
full_distribution_name=get_full_name,
supported_dists=supported
)
)
# The platform.linux_distribution() lib has issue with detecting OpenWRT linux distribution.
# Merge the following patch provided by OpenWRT as a temporary fix.
if os.path.exists("/etc/openwrt_release"):
osinfo = get_openwrt_platform()
if not osinfo or osinfo == ['', '', '']:
return get_linux_distribution_from_distro(get_full_name)
full_name = platform.linux_distribution()[0].strip() # pylint: disable=W1505
osinfo.append(full_name)
except AttributeError:
return get_linux_distribution_from_distro(get_full_name)
return osinfo
def get_linux_distribution_from_distro(get_full_name):
"""Get the distribution information from the distro Python module."""
# If we get here we have to have the distro module, thus we do
# not wrap the call in a try-except block as it would mask the problem
# and result in a broken agent installation
osinfo = list(
distro.linux_distribution(
full_distribution_name=get_full_name
)
)
full_name = distro.linux_distribution()[0].strip()
osinfo.append(full_name)
# Fixing is the problem https://github.com/Azure/WALinuxAgent/issues/2715. Distro.linux_distribution method not retuning full version
# If best is true, the most precise version number out of all examined sources is returned.
if "mariner" in osinfo[0].lower():
osinfo[1] = distro.version(best=True)
return osinfo
def get_openwrt_platform():
"""
Add this workaround for detecting OpenWRT products because
the version and product information is contained in the /etc/openwrt_release file.
"""
result = [None, None, None]
openwrt_version = re.compile(r"^DISTRIB_RELEASE=['\"](\d+\.\d+.\d+)['\"]")
openwrt_product = re.compile(r"^DISTRIB_ID=['\"]([\w-]+)['\"]")
with open('/etc/openwrt_release', 'r') as fh:
content = fh.readlines()
for line in content:
version_matches = openwrt_version.match(line)
product_matches = openwrt_product.match(line)
if version_matches:
result[1] = version_matches.group(1)
elif product_matches:
if product_matches.group(1) == "OpenWrt":
result[0] = "openwrt"
return result
def is_file_not_found_error(exception):
# pylint for python2 complains, but FileNotFoundError is
# defined for python3.
# pylint: disable=undefined-variable
if sys.version_info[0] == 2:
# Python 2 uses OSError(errno=2)
return isinstance(exception, OSError) and exception.errno == 2
elif sys.version_info[0] == 3:
return isinstance(exception, FileNotFoundError)
return isinstance(exception, FileNotFoundError)
@contextlib.contextmanager
def subprocess_dev_null():
if sys.version_info[0] == 3:
# Suppress no-member errors on python2.7
yield subprocess.DEVNULL # pylint: disable=no-member
else:
try:
devnull = open(os.devnull, "a+")
yield devnull
except Exception:
yield None
finally:
if devnull is not None:
devnull.close()
def array_to_bytes(buff):
# Python 3.9 removed the tostring() method on arrays, the new alias is tobytes()
if sys.version_info[0] == 2:
return buff.tostring()
if sys.version_info[0] == 3 and sys.version_info[1] <= 8:
return buff.tostring()
return buff.tobytes()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/logger.py 0000664 0000000 0000000 00000026656 14626177470 0024115 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and openssl_bin 1.0+
#
"""
Log utils
"""
import sys
from datetime import datetime, timedelta
from threading import currentThread
from azurelinuxagent.common.future import ustr
EVERY_DAY = timedelta(days=1)
EVERY_HALF_DAY = timedelta(hours=12)
EVERY_SIX_HOURS = timedelta(hours=6)
EVERY_HOUR = timedelta(hours=1)
EVERY_HALF_HOUR = timedelta(minutes=30)
EVERY_FIFTEEN_MINUTES = timedelta(minutes=15)
EVERY_MINUTE = timedelta(minutes=1)
class Logger(object):
"""
Logger class
"""
# This format is based on ISO-8601, Z represents UTC (Zero offset)
LogTimeFormatInUTC = u'%Y-%m-%dT%H:%M:%S.%fZ'
def __init__(self, logger=None, prefix=None):
self.appenders = []
self.logger = self if logger is None else logger
self.periodic_messages = {}
self.prefix = prefix
self.silent = False
def reset_periodic(self):
self.logger.periodic_messages = {}
def set_prefix(self, prefix):
self.prefix = prefix
def _is_period_elapsed(self, delta, h):
return h not in self.logger.periodic_messages or \
(self.logger.periodic_messages[h] + delta) <= datetime.now()
def _periodic(self, delta, log_level_op, msg_format, *args):
h = hash(msg_format)
if self._is_period_elapsed(delta, h):
log_level_op(msg_format, *args)
self.logger.periodic_messages[h] = datetime.now()
def periodic_info(self, delta, msg_format, *args):
self._periodic(delta, self.info, msg_format, *args)
def periodic_verbose(self, delta, msg_format, *args):
self._periodic(delta, self.verbose, msg_format, *args)
def periodic_warn(self, delta, msg_format, *args):
self._periodic(delta, self.warn, msg_format, *args)
def periodic_error(self, delta, msg_format, *args):
self._periodic(delta, self.error, msg_format, *args)
def verbose(self, msg_format, *args):
self.log(LogLevel.VERBOSE, msg_format, *args)
def info(self, msg_format, *args):
self.log(LogLevel.INFO, msg_format, *args)
def warn(self, msg_format, *args):
self.log(LogLevel.WARNING, msg_format, *args)
def error(self, msg_format, *args):
self.log(LogLevel.ERROR, msg_format, *args)
def log(self, level, msg_format, *args):
def write_log(log_appender): # pylint: disable=W0612
"""
The appender_lock flag is used to signal if the logger is currently in use. This prevents a subsequent log
coming in due to writing of a log statement to be not written.
Eg:
Assuming a logger with two appenders - FileAppender and TelemetryAppender. Here is an example of
how using appender_lock flag can help.
logger.warn("foo")
|- log.warn() (azurelinuxagent.common.logger.Logger.warn)
|- log() (azurelinuxagent.common.logger.Logger.log)
|- FileAppender.appender_lock is currently False not log_appender.appender_lock is True
|- We sets it to True.
|- FileAppender.write completes.
|- FileAppender.appender_lock sets to False.
|- TelemetryAppender.appender_lock is currently False not log_appender.appender_lock is True
|- We sets it to True.
[A] |- TelemetryAppender.write gets called but has an error and writes a log.warn("bar")
|- log() (azurelinuxagent.common.logger.Logger.log)
|- FileAppender.appender_lock is set to True (log_appender.appender_lock was false when entering).
|- FileAppender.write completes.
|- FileAppender.appender_lock sets to False.
|- TelemetryAppender.appender_lock is already True, not log_appender.appender_lock is False
Thus [A] cannot happen again if TelemetryAppender.write is not getting called. It prevents
faulty appenders to not get called again and again.
:param log_appender: Appender
:return: None
"""
if not log_appender.appender_lock:
try:
log_appender.appender_lock = True
log_appender.write(level, log_item)
finally:
log_appender.appender_lock = False
if self.silent:
return
# if msg_format is not unicode convert it to unicode
if type(msg_format) is not ustr:
msg_format = ustr(msg_format, errors="backslashreplace")
if len(args) > 0:
msg = msg_format.format(*args)
else:
msg = msg_format
time = datetime.utcnow().strftime(Logger.LogTimeFormatInUTC)
level_str = LogLevel.STRINGS[level]
thread_name = currentThread().getName()
if self.prefix is not None:
log_item = u"{0} {1} {2} {3} {4}\n".format(time, level_str, thread_name, self.prefix, msg)
else:
log_item = u"{0} {1} {2} {3}\n".format(time, level_str, thread_name, msg)
log_item = ustr(log_item.encode('ascii', "backslashreplace"),
encoding="ascii")
for appender in self.appenders:
appender.write(level, log_item)
#
# TODO: we should actually call
#
# write_log(appender)
#
# (see PR #1659). Before doing that, write_log needs to be thread-safe.
#
# This needs to be done when SEND_LOGS_TO_TELEMETRY is enabled.
#
if self.logger != self:
for appender in self.logger.appenders:
appender.write(level, log_item)
#
# TODO: call write_log instead (see comment above)
#
def add_appender(self, appender_type, level, path):
appender = _create_logger_appender(appender_type, level, path)
self.appenders.append(appender)
def console_output_enabled(self):
"""
Returns True if the current list of appenders includes at least one ConsoleAppender
"""
return any(isinstance(appender, ConsoleAppender) for appender in self.appenders)
def disable_console_output(self):
"""
Removes all ConsoleAppenders from the current list of appenders
"""
self.appenders = [appender for appender in self.appenders if not isinstance(appender, ConsoleAppender)]
class Appender(object):
def __init__(self, level):
self.appender_lock = False
self.level = level
def write(self, level, msg):
pass
class ConsoleAppender(Appender):
def __init__(self, level, path):
super(ConsoleAppender, self).__init__(level)
self.path = path
def write(self, level, msg):
if self.level <= level:
try:
with open(self.path, "w") as console:
console.write(msg)
except IOError:
pass
class FileAppender(Appender):
def __init__(self, level, path):
super(FileAppender, self).__init__(level)
self.path = path
def write(self, level, msg):
if self.level <= level:
try:
with open(self.path, "a+") as log_file:
log_file.write(msg)
except IOError:
pass
class StdoutAppender(Appender):
def __init__(self, level): # pylint: disable=W0235
super(StdoutAppender, self).__init__(level)
def write(self, level, msg):
if self.level <= level:
try:
sys.stdout.write(msg)
except IOError:
pass
class TelemetryAppender(Appender):
def __init__(self, level, event_func):
super(TelemetryAppender, self).__init__(level)
self.event_func = event_func
def write(self, level, msg):
if self.level <= level:
try:
self.event_func(level, msg)
except IOError:
pass
# Initialize logger instance
DEFAULT_LOGGER = Logger()
class LogLevel(object):
VERBOSE = 0
INFO = 1
WARNING = 2
ERROR = 3
STRINGS = [
"VERBOSE",
"INFO",
"WARNING",
"ERROR"
]
class AppenderType(object):
FILE = 0
CONSOLE = 1
STDOUT = 2
TELEMETRY = 3
def add_logger_appender(appender_type, level=LogLevel.INFO, path=None):
DEFAULT_LOGGER.add_appender(appender_type, level, path)
def console_output_enabled():
return DEFAULT_LOGGER.console_output_enabled()
def disable_console_output():
DEFAULT_LOGGER.disable_console_output()
def reset_periodic():
DEFAULT_LOGGER.reset_periodic()
def set_prefix(prefix):
DEFAULT_LOGGER.set_prefix(prefix)
def periodic_info(delta, msg_format, *args):
"""
The hash-map maintaining the state of the logs gets reset here -
azurelinuxagent.ga.monitor.MonitorHandler.reset_loggers. The current time period is defined by RESET_LOGGERS_PERIOD.
"""
DEFAULT_LOGGER.periodic_info(delta, msg_format, *args)
def periodic_verbose(delta, msg_format, *args):
"""
The hash-map maintaining the state of the logs gets reset here -
azurelinuxagent.ga.monitor.MonitorHandler.reset_loggers. The current time period is defined by RESET_LOGGERS_PERIOD.
"""
DEFAULT_LOGGER.periodic_verbose(delta, msg_format, *args)
def periodic_error(delta, msg_format, *args):
"""
The hash-map maintaining the state of the logs gets reset here -
azurelinuxagent.ga.monitor.MonitorHandler.reset_loggers. The current time period is defined by RESET_LOGGERS_PERIOD.
"""
DEFAULT_LOGGER.periodic_error(delta, msg_format, *args)
def periodic_warn(delta, msg_format, *args):
"""
The hash-map maintaining the state of the logs gets reset here -
azurelinuxagent.ga.monitor.MonitorHandler.reset_loggers. The current time period is defined by RESET_LOGGERS_PERIOD.
"""
DEFAULT_LOGGER.periodic_warn(delta, msg_format, *args)
def verbose(msg_format, *args):
DEFAULT_LOGGER.verbose(msg_format, *args)
def info(msg_format, *args):
DEFAULT_LOGGER.info(msg_format, *args)
def warn(msg_format, *args):
DEFAULT_LOGGER.warn(msg_format, *args)
def error(msg_format, *args):
DEFAULT_LOGGER.error(msg_format, *args)
def log(level, msg_format, *args):
DEFAULT_LOGGER.log(level, msg_format, args)
def _create_logger_appender(appender_type, level=LogLevel.INFO, path=None):
if appender_type == AppenderType.CONSOLE:
return ConsoleAppender(level, path)
elif appender_type == AppenderType.FILE:
return FileAppender(level, path)
elif appender_type == AppenderType.STDOUT:
return StdoutAppender(level)
elif appender_type == AppenderType.TELEMETRY:
return TelemetryAppender(level, path)
else:
raise ValueError("Unknown appender type")
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/ 0000775 0000000 0000000 00000000000 14626177470 0023564 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/__init__.py 0000664 0000000 0000000 00000001263 14626177470 0025677 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.common.osutil.factory import get_osutil
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/alpine.py 0000664 0000000 0000000 00000003163 14626177470 0025411 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class AlpineOSUtil(DefaultOSUtil):
def __init__(self):
super(AlpineOSUtil, self).__init__()
self.agent_conf_file_path = '/etc/waagent.conf'
self.jit_enabled = True
def is_dhcp_enabled(self):
return True
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "dhcpcd"])
def restart_if(self, ifname, retries=None, wait=None):
logger.info('restarting {} (sort of, actually SIGHUPing dhcpcd)'.format(ifname))
pid = self.get_dhcp_pid()
if pid != None:
ret = shellutil.run_get_output('kill -HUP {}'.format(pid)) # pylint: disable=W0612
def set_ssh_client_alive_interval(self):
# Alpine will handle this.
pass
def conf_sshd(self, disable_password):
# Alpine will handle this.
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/arch.py 0000664 0000000 0000000 00000004161 14626177470 0025055 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class ArchUtil(DefaultOSUtil):
def __init__(self):
super(ArchUtil, self).__init__()
self.jit_enabled = True
@staticmethod
def get_systemd_unit_file_install_path():
return "/usr/lib/systemd/system"
@staticmethod
def get_agent_bin_path():
return "/usr/bin"
def is_dhcp_enabled(self):
return True
def start_network(self):
return shellutil.run("systemctl start systemd-networkd", chk_err=False)
def restart_if(self, ifname=None, retries=None, wait=None):
shellutil.run("systemctl restart systemd-networkd")
def restart_ssh_service(self):
# SSH is socket activated on CoreOS. No need to restart it.
pass
def stop_dhcp_service(self):
return shellutil.run("systemctl stop systemd-networkd", chk_err=False)
def start_dhcp_service(self):
return shellutil.run("systemctl start systemd-networkd", chk_err=False)
def start_agent_service(self):
return shellutil.run("systemctl start {0}".format(self.service_name), chk_err=False)
def stop_agent_service(self):
return shellutil.run("systemctl stop {0}".format(self.service_name), chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "systemd-networkd"])
def conf_sshd(self, disable_password):
# Don't whack the system default sshd conf
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/bigip.py 0000664 0000000 0000000 00000033173 14626177470 0025237 0 ustar 00root root 0000000 0000000 # Copyright 2016 F5 Networks Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import array
import fcntl
import os
import platform
import re
import socket
import struct
import time
from azurelinuxagent.common.future import array_to_bytes
try:
# WAAgent > 2.1.3
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.exception import OSUtilError
from azurelinuxagent.common.osutil.default import DefaultOSUtil
except ImportError:
# WAAgent <= 2.1.3
import azurelinuxagent.logger as logger
import azurelinuxagent.utils.shellutil as shellutil
from azurelinuxagent.exception import OSUtilError
from azurelinuxagent.distro.default.osutil import DefaultOSUtil
class BigIpOSUtil(DefaultOSUtil):
def __init__(self): # pylint: disable=W0235
super(BigIpOSUtil, self).__init__()
def _wait_until_mcpd_is_initialized(self):
"""Wait for mcpd to become available
All configuration happens in mcpd so we need to wait that this is
available before we go provisioning the system. I call this method
at the first opportunity I have (during the DVD mounting call).
This ensures that the rest of the provisioning does not need to wait
for mcpd to be available unless it absolutely wants to.
:return bool: Returns True upon success
:raises OSUtilError: Raises exception if mcpd does not come up within
roughly 50 minutes (100 * 30 seconds)
"""
for retries in range(1, 100): # pylint: disable=W0612
# Retry until mcpd completes startup:
logger.info("Checking to see if mcpd is up")
rc = shellutil.run("/usr/bin/tmsh -a show sys mcp-state field-fmt 2>/dev/null | grep phase | grep running", chk_err=False)
if rc == 0:
logger.info("mcpd is up!")
break
time.sleep(30)
if rc == 0:
return True
raise OSUtilError(
"mcpd hasn't completed initialization! Cannot proceed!"
)
def _save_sys_config(self):
cmd = "/usr/bin/tmsh save sys config"
rc = shellutil.run(cmd)
if rc != 0:
logger.error("WARNING: Cannot save sys config on 1st boot.")
return rc
def restart_ssh_service(self):
return shellutil.run("/usr/bin/bigstart restart sshd", chk_err=False)
def stop_agent_service(self):
return shellutil.run("/sbin/service {0} stop".format(self.service_name), chk_err=False)
def start_agent_service(self):
return shellutil.run("/sbin/service {0} start".format(self.service_name), chk_err=False)
def register_agent_service(self):
return shellutil.run("/sbin/chkconfig --add {0}".format(self.service_name), chk_err=False)
def unregister_agent_service(self):
return shellutil.run("/sbin/chkconfig --del {0}".format(self.service_name), chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["/sbin/pidof", "dhclient"])
def set_hostname(self, hostname):
"""Set the static hostname of the device
Normally, tmsh is used to set the hostname for the system. For our
purposes at this time though, I would hesitate to trust this function.
Azure(Stack) uses the name that you provide in the Web UI or ARM (for
example) as the value of the hostname argument to this method. The
problem is that there is nowhere in the UI that specifies the
restrictions and checks that tmsh has for the hostname.
For example, if you set the name "bigip1" in the Web UI, Azure(Stack)
considers that a perfectly valid name. When WAAgent gets around to
running though, tmsh will reject that value because it is not a fully
qualified domain name. The proper value should have been bigip.xxx.yyy
WAAgent will not fail if this command fails, but the hostname will not
be what the user set either. Currently we do not set the hostname when
WAAgent starts up, so I am passing on setting it here too.
:param hostname: The hostname to set on the device
"""
return None
def set_dhcp_hostname(self, hostname):
"""Sets the DHCP hostname
See `set_hostname` for an explanation of why I pass here
:param hostname: The hostname to set on the device
"""
return None
def useradd(self, username, expiration=None, comment=None):
"""Create user account using tmsh
Our policy is to create two accounts when booting a BIG-IP instance.
The first account is the one that the user specified when they did
the instance creation. The second one is the admin account that is,
or should be, built in to the system.
:param username: The username that you want to add to the system
:param expiration: The expiration date to use. We do not use this
value.
:param comment: description of the account. We do not use this value.
"""
if self.get_userentry(username):
logger.info("User {0} already exists, skip useradd", username)
return None
cmd = ['/usr/bin/tmsh', 'create', 'auth', 'user', username, 'partition-access', 'add', '{', 'all-partitions',
'{', 'role', 'admin', '}', '}', 'shell', 'bash']
self._run_command_raising_OSUtilError(cmd, err_msg="Failed to create user account:{0}".format(username))
self._save_sys_config()
return 0
def chpasswd(self, username, password, crypt_id=6, salt_len=10):
"""Change a user's password with tmsh
Since we are creating the user specified account and additionally
changing the password of the built-in 'admin' account, both must
be modified in this method.
Note that the default method also checks for a "system level" of the
user; based on the value of UID_MIN in /etc/login.defs. In our env,
all user accounts have the UID 0. So we can't rely on this value.
:param username: The username whose password to change
:param password: The unencrypted password to set for the user
:param crypt_id: If encrypting the password, the crypt_id that was used
:param salt_len: If encrypting the password, the length of the salt
value used to do it.
"""
# Start by setting the password of the user provided account
self._run_command_raising_OSUtilError(
['/usr/bin/tmsh', 'modify', 'auth', 'user', username, 'password', password],
err_msg="Failed to set password for {0}".format(username))
# Next, set the password of the built-in 'admin' account to be have
# the same password as the user provided account
userentry = self.get_userentry('admin')
if userentry is None:
raise OSUtilError("The 'admin' user account was not found!")
self._run_command_raising_OSUtilError(
['/usr/bin/tmsh', 'modify', 'auth', 'user', 'admin', 'password', password],
err_msg="Failed to set password for admin")
self._save_sys_config()
return 0
def del_account(self, username):
"""Deletes a user account.
Note that the default method also checks for a "system level" of the
user; based on the value of UID_MIN in /etc/login.defs. In our env,
all user accounts have the UID 0. So we can't rely on this value.
We also don't use sudo, so we remove that method call as well.
:param username:
:return:
"""
self._run_command_without_raising(["touch", "/var/run/utmp"])
self._run_command_without_raising(['/usr/bin/tmsh', 'delete', 'auth', 'user', username])
def get_dvd_device(self, dev_dir='/dev'):
"""Find BIG-IP's CD/DVD device
This device is almost certainly /dev/cdrom so I added the ? to this pattern.
Note that this method will return upon the first device found, but in my
tests with 12.1.1 it will also find /dev/sr0 on occasion. This is NOT the
correct CD/DVD device though.
:todo: Consider just always returning "/dev/cdrom" here if that device device
exists on all platforms that are supported on Azure(Stack)
:param dev_dir: The root directory from which to look for devices
"""
patten = r'(sr[0-9]|hd[c-z]|cdrom[0-9]?)'
for dvd in [re.match(patten, dev) for dev in os.listdir(dev_dir)]:
if dvd is not None:
return "/dev/{0}".format(dvd.group(0))
raise OSUtilError("Failed to get dvd device")
# The linter reports that this function's arguments differ from those
# of the function this overrides. This doesn't seem to be a problem, however,
# because this function accepts any option that could'be been specified for
# the original (and, by forwarding the kwargs to the original, will reject any
# option _not_ accepted by the original). Additionally, this method allows us
# to keep the defaults for mount_dvd in one place (the original function) instead
# of having to duplicate it here as well.
def mount_dvd(self, **kwargs): # pylint: disable=W0221
"""Mount the DVD containing the provisioningiso.iso file
This is the _first_ hook that WAAgent provides for us, so this is the
point where we should wait for mcpd to load. I am just overloading
this method to add the mcpd wait. Then I proceed with the stock code.
:param max_retry: Maximum number of retries waagent will make when
mounting the provisioningiso.iso DVD
:param chk_err: Whether to check for errors or not in the mounting
commands
"""
self._wait_until_mcpd_is_initialized()
return super(BigIpOSUtil, self).mount_dvd(**kwargs)
def eject_dvd(self, chk_err=True):
"""Runs the eject command to eject the provisioning DVD
BIG-IP does not include an eject command. It is sufficient to just
umount the DVD disk. But I will log that we do not support this for
future reference.
:param chk_err: Whether or not to check for errors raised by the eject
command
"""
logger.warn("Eject is not supported on this platform")
def get_first_if(self):
"""Return the interface name, and ip addr of the management interface.
We need to add a struct_size check here because, curiously, our 64bit
platform is identified by python in Azure(Stack) as 32 bit and without
adjusting the struct_size, we can't get the information we need.
I believe this may be caused by only python i686 being shipped with
BIG-IP instead of python x86_64??
"""
iface = ''
expected = 16 # how many devices should I expect...
python_arc = platform.architecture()[0]
if python_arc == '64bit':
struct_size = 40 # for 64bit the size is 40 bytes
else:
struct_size = 32 # for 32bit the size is 32 bytes
sock = socket.socket(socket.AF_INET,
socket.SOCK_DGRAM,
socket.IPPROTO_UDP)
buff = array.array('B', b'\0' * (expected * struct_size))
param = struct.pack('iL',
expected*struct_size,
buff.buffer_info()[0])
ret = fcntl.ioctl(sock.fileno(), 0x8912, param)
retsize = (struct.unpack('iL', ret)[0])
if retsize == (expected * struct_size):
logger.warn(('SIOCGIFCONF returned more than {0} up '
'network interfaces.'), expected)
sock = array_to_bytes(buff)
for i in range(0, struct_size * expected, struct_size):
iface = self._format_single_interface_name(sock, i)
# Azure public was returning "lo:1" when deploying WAF
if b'lo' in iface:
continue
else:
break
return iface.decode('latin-1'), socket.inet_ntoa(sock[i+20:i+24]) # pylint: disable=undefined-loop-variable
def _format_single_interface_name(self, sock, offset):
return sock[offset:offset+16].split(b'\0', 1)[0]
def route_add(self, net, mask, gateway):
"""Add specified route using tmsh.
:param net:
:param mask:
:param gateway:
:return:
"""
cmd = ("/usr/bin/tmsh create net route "
"{0}/{1} gw {2}").format(net, mask, gateway)
return shellutil.run(cmd, chk_err=False)
def device_for_ide_port(self, port_id):
"""Return device name attached to ide port 'n'.
Include a wait in here because BIG-IP may not have yet initialized
this list of devices.
:param port_id:
:return:
"""
for retries in range(1, 100): # pylint: disable=W0612
# Retry until devices are ready
if os.path.exists("/sys/bus/vmbus/devices/"):
break
else:
time.sleep(10)
return super(BigIpOSUtil, self).device_for_ide_port(port_id)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/clearlinux.py 0000664 0000000 0000000 00000007535 14626177470 0026316 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os # pylint: disable=W0611
import re # pylint: disable=W0611
import pwd # pylint: disable=W0611
import shutil # pylint: disable=W0611
import socket # pylint: disable=W0611
import array # pylint: disable=W0611
import struct # pylint: disable=W0611
import fcntl # pylint: disable=W0611
import time # pylint: disable=W0611
import base64 # pylint: disable=W0611
import errno
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger # pylint: disable=W0611
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.utils.textutil as textutil # pylint: disable=W0611
from azurelinuxagent.common.osutil.default import DefaultOSUtil
from azurelinuxagent.common.exception import OSUtilError
class ClearLinuxUtil(DefaultOSUtil):
def __init__(self):
super(ClearLinuxUtil, self).__init__()
self.agent_conf_file_path = '/usr/share/defaults/waagent/waagent.conf'
self.jit_enabled = True
@staticmethod
def get_systemd_unit_file_install_path():
return "/usr/lib/systemd/system"
@staticmethod
def get_agent_bin_path():
return "/usr/bin"
def is_dhcp_enabled(self):
return True
def start_network(self) :
return shellutil.run("systemctl start systemd-networkd", chk_err=False)
def restart_if(self, ifname=None, retries=None, wait=None):
shellutil.run("systemctl restart systemd-networkd")
def restart_ssh_service(self):
# SSH is socket activated. No need to restart it.
pass
def stop_dhcp_service(self):
return shellutil.run("systemctl stop systemd-networkd", chk_err=False)
def start_dhcp_service(self):
return shellutil.run("systemctl start systemd-networkd", chk_err=False)
def start_agent_service(self):
return shellutil.run("systemctl start {0}".format(self.service_name), chk_err=False)
def stop_agent_service(self):
return shellutil.run("systemctl stop {0}".format(self.service_name), chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "systemd-networkd"])
def conf_sshd(self, disable_password):
# Don't whack the system default sshd conf
pass
def del_root_password(self):
try:
passwd_file_path = conf.get_passwd_file_path()
try:
passwd_content = fileutil.read_file(passwd_file_path)
if not passwd_content:
# Empty file is no better than no file
raise IOError(errno.ENOENT, "Empty File", passwd_file_path)
except (IOError, OSError) as file_read_err:
if file_read_err.errno != errno.ENOENT:
raise
new_passwd = ["root:*LOCK*:14600::::::"]
else:
passwd = passwd_content.split('\n')
new_passwd = [x for x in passwd if not x.startswith("root:")]
new_passwd.insert(0, "root:*LOCK*:14600::::::")
fileutil.write_file(passwd_file_path, "\n".join(new_passwd))
except IOError as e:
raise OSUtilError("Failed to delete root password:{0}".format(e))
pass # pylint: disable=W0107
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/coreos.py 0000664 0000000 0000000 00000005720 14626177470 0025434 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class CoreOSUtil(DefaultOSUtil):
def __init__(self):
super(CoreOSUtil, self).__init__()
self.agent_conf_file_path = '/usr/share/oem/waagent.conf'
self.waagent_path = '/usr/share/oem/bin/waagent'
self.python_path = '/usr/share/oem/python/bin'
self.jit_enabled = True
if 'PATH' in os.environ:
path = "{0}:{1}".format(os.environ['PATH'], self.python_path)
else:
path = self.python_path
os.environ['PATH'] = path
if 'PYTHONPATH' in os.environ:
py_path = os.environ['PYTHONPATH']
py_path = "{0}:{1}".format(py_path, self.waagent_path)
else:
py_path = self.waagent_path
os.environ['PYTHONPATH'] = py_path
@staticmethod
def get_agent_bin_path():
return "/usr/share/oem/bin"
def is_sys_user(self, username):
# User 'core' is not a sysuser.
if username == 'core':
return False
return super(CoreOSUtil, self).is_sys_user(username)
def is_dhcp_enabled(self):
return True
def start_network(self):
return shellutil.run("systemctl start systemd-networkd", chk_err=False)
def restart_if(self, ifname=None, retries=None, wait=None):
shellutil.run("systemctl restart systemd-networkd")
def restart_ssh_service(self):
# SSH is socket activated on CoreOS. No need to restart it.
pass
def stop_dhcp_service(self):
return shellutil.run("systemctl stop systemd-networkd", chk_err=False)
def start_dhcp_service(self):
return shellutil.run("systemctl start systemd-networkd", chk_err=False)
def start_agent_service(self):
return shellutil.run("systemctl start {0}".format(self.service_name), chk_err=False)
def stop_agent_service(self):
return shellutil.run("systemctl stop {0}".format(self.service_name), chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(
["systemctl", "show", "-p", "MainPID", "systemd-networkd"],
transform_command_output=lambda o: o.replace("MainPID=", ""))
def conf_sshd(self, disable_password):
# In CoreOS, /etc/sshd_config is mount readonly. Skip the setting.
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/debian.py 0000664 0000000 0000000 00000005243 14626177470 0025364 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os # pylint: disable=W0611
import re # pylint: disable=W0611
import pwd # pylint: disable=W0611
import shutil # pylint: disable=W0611
import socket # pylint: disable=W0611
import array # pylint: disable=W0611
import struct # pylint: disable=W0611
import fcntl # pylint: disable=W0611
import time # pylint: disable=W0611
import base64 # pylint: disable=W0611
import azurelinuxagent.common.logger as logger # pylint: disable=W0611
import azurelinuxagent.common.utils.fileutil as fileutil # pylint: disable=W0611
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.utils.textutil as textutil # pylint: disable=W0611
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class DebianOSBaseUtil(DefaultOSUtil):
def __init__(self):
super(DebianOSBaseUtil, self).__init__()
self.jit_enabled = True
def restart_ssh_service(self):
return shellutil.run("systemctl --job-mode=ignore-dependencies try-reload-or-restart ssh", chk_err=False)
def stop_agent_service(self):
return shellutil.run("service azurelinuxagent stop", chk_err=False)
def start_agent_service(self):
return shellutil.run("service azurelinuxagent start", chk_err=False)
def start_network(self):
pass
def remove_rules_files(self, rules_files=""):
pass
def restore_rules_files(self, rules_files=""):
pass
def get_dhcp_lease_endpoint(self):
return self.get_endpoint_from_leases_path('/var/lib/dhcp/dhclient.*.leases')
class DebianOSModernUtil(DebianOSBaseUtil):
def __init__(self):
super(DebianOSModernUtil, self).__init__()
self.jit_enabled = True
self.service_name = self.get_service_name()
@staticmethod
def get_service_name():
return "walinuxagent"
def stop_agent_service(self):
return shellutil.run("systemctl stop {0}".format(self.service_name), chk_err=False)
def start_agent_service(self):
return shellutil.run("systemctl start {0}".format(self.service_name), chk_err=False)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/default.py 0000664 0000000 0000000 00000172366 14626177470 0025601 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import base64
import datetime
import errno
import fcntl
import glob
import json
import multiprocessing
import os
import platform
import pwd
import re
import shutil
import socket
import struct
import sys
import time
from pwd import getpwall
import array
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.exception import OSUtilError
from azurelinuxagent.common.future import ustr, array_to_bytes
from azurelinuxagent.common.utils.cryptutil import CryptUtil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.utils.networkutil import RouteEntry, NetworkInterfaceCard, AddFirewallRules
from azurelinuxagent.common.utils.shellutil import CommandError
__RULES_FILES__ = ["/lib/udev/rules.d/75-persistent-net-generator.rules",
"/etc/udev/rules.d/70-persistent-net.rules"]
"""
Define distro specific behavior. OSUtil class defines default behavior
for all distros. Each concrete distro classes could overwrite default behavior
if needed.
"""
_IPTABLES_VERSION_PATTERN = re.compile("^[^\d\.]*([\d\.]+).*$") # pylint: disable=W1401
_IPTABLES_LOCKING_VERSION = FlexibleVersion('1.4.21')
def _add_wait(wait, command):
"""
If 'wait' is True, adds the wait option (-w) to the given iptables command line
"""
if wait:
command.insert(1, "-w")
return command
def get_iptables_version_command():
return ["iptables", "--version"]
def get_firewall_list_command(wait):
return _add_wait(wait, ["iptables", "-t", "security", "-L", "-nxv"])
def get_firewall_packets_command(wait):
return _add_wait(wait, ["iptables", "-t", "security", "-L", "OUTPUT", "--zero", "OUTPUT", "-nxv"])
# Precisely delete the rules created by the agent.
# this rule was used <= 2.2.25. This rule helped to validate our change, and determine impact.
def get_firewall_delete_conntrack_accept_command(wait, destination):
return _add_wait(wait,
["iptables", "-t", "security", AddFirewallRules.DELETE_COMMAND, "OUTPUT", "-d", destination, "-p", "tcp", "-m", "conntrack",
"--ctstate", "INVALID,NEW", "-j", "ACCEPT"])
def get_delete_accept_tcp_rule(wait, destination):
return AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.DELETE_COMMAND, destination, wait=wait)
def get_firewall_delete_owner_accept_command(wait, destination, owner_uid):
return _add_wait(wait, ["iptables", "-t", "security", AddFirewallRules.DELETE_COMMAND, "OUTPUT", "-d", destination, "-p", "tcp", "-m", "owner",
"--uid-owner", str(owner_uid), "-j", "ACCEPT"])
def get_firewall_delete_conntrack_drop_command(wait, destination):
return _add_wait(wait,
["iptables", "-t", "security", AddFirewallRules.DELETE_COMMAND, "OUTPUT", "-d", destination, "-p", "tcp", "-m", "conntrack",
"--ctstate", "INVALID,NEW", "-j", "DROP"])
PACKET_PATTERN = "^\s*(\d+)\s+(\d+)\s+DROP\s+.*{0}[^\d]*$" # pylint: disable=W1401
ALL_CPUS_REGEX = re.compile('^cpu .*')
ALL_MEMS_REGEX = re.compile('^Mem.*')
_enable_firewall = True
DMIDECODE_CMD = 'dmidecode --string system-uuid'
PRODUCT_ID_FILE = '/sys/class/dmi/id/product_uuid'
UUID_PATTERN = re.compile(
r'^\s*[A-F0-9]{8}(?:\-[A-F0-9]{4}){3}\-[A-F0-9]{12}\s*$',
re.IGNORECASE)
IOCTL_SIOCGIFCONF = 0x8912
IOCTL_SIOCGIFFLAGS = 0x8913
IOCTL_SIOCGIFHWADDR = 0x8927
IFNAMSIZ = 16
IP_COMMAND_OUTPUT = re.compile('^\d+:\s+(\w+):\s+(.*)$') # pylint: disable=W1401
STORAGE_DEVICE_PATH = '/sys/bus/vmbus/devices/'
GEN2_DEVICE_ID = 'f8b3781a-1e82-4818-a1c3-63d806ec15bb'
class DefaultOSUtil(object):
def __init__(self):
self.agent_conf_file_path = '/etc/waagent.conf'
self.selinux = None
self.disable_route_warning = False
self.jit_enabled = False
self.service_name = self.get_service_name()
@staticmethod
def get_service_name():
return "waagent"
@staticmethod
def get_systemd_unit_file_install_path():
return "/lib/systemd/system"
@staticmethod
def get_agent_bin_path():
return "/usr/sbin"
@staticmethod
def get_vm_arch():
try:
return platform.machine()
except Exception as e:
logger.warn("Unable to determine cpu architecture: {0}", ustr(e))
return "unknown"
def get_firewall_dropped_packets(self, dst_ip=None):
# If a previous attempt failed, do not retry
global _enable_firewall # pylint: disable=W0603
if not _enable_firewall:
return 0
try:
wait = self.get_firewall_will_wait()
try:
output = shellutil.run_command(get_firewall_packets_command(wait))
pattern = re.compile(PACKET_PATTERN.format(dst_ip))
for line in output.split('\n'):
m = pattern.match(line)
if m is not None:
return int(m.group(1))
except Exception as e:
if isinstance(e, CommandError) and (e.returncode == 3 or e.returncode == 4): # pylint: disable=E1101
# Transient error that we ignore returncode 3. This code fires every loop
# of the daemon (60m), so we will get the value eventually.
# ignore returncode 4 as temporary fix (RULE_REPLACE failed (Invalid argument))
return 0
logger.warn("Failed to get firewall packets: {0}", ustr(e))
return -1
return 0
except Exception as e:
_enable_firewall = False
logger.warn("Unable to retrieve firewall packets dropped"
"{0}".format(ustr(e)))
return -1
def get_firewall_will_wait(self):
# Determine if iptables will serialize access
try:
output = shellutil.run_command(get_iptables_version_command())
except Exception as e:
msg = "Unable to determine version of iptables: {0}".format(ustr(e))
logger.warn(msg)
raise Exception(msg)
m = _IPTABLES_VERSION_PATTERN.match(output)
if m is None:
msg = "iptables did not return version information: {0}".format(output)
logger.warn(msg)
raise Exception(msg)
wait = "-w" \
if FlexibleVersion(m.group(1)) >= _IPTABLES_LOCKING_VERSION \
else ""
return wait
def _delete_rule(self, rule):
"""
Continually execute the delete operation until the return
code is non-zero or the limit has been reached.
"""
for i in range(1, 100): # pylint: disable=W0612
try:
rc = shellutil.run_command(rule) # pylint: disable=W0612
except CommandError as e:
if e.returncode == 1:
return
if e.returncode == 2:
raise Exception("invalid firewall deletion rule '{0}'".format(rule))
def remove_firewall(self, dst_ip, uid, wait):
# If a previous attempt failed, do not retry
global _enable_firewall # pylint: disable=W0603
if not _enable_firewall:
return False
try:
# This rule was <= 2.2.25 only, and may still exist on some VMs. Until 2.2.25
# has aged out, keep this cleanup in place.
self._delete_rule(get_firewall_delete_conntrack_accept_command(wait, dst_ip))
self._delete_rule(get_delete_accept_tcp_rule(wait, dst_ip))
self._delete_rule(get_firewall_delete_owner_accept_command(wait, dst_ip, uid))
self._delete_rule(get_firewall_delete_conntrack_drop_command(wait, dst_ip))
return True
except Exception as e:
_enable_firewall = False
logger.info("Unable to remove firewall -- "
"no further attempts will be made: "
"{0}".format(ustr(e)))
return False
def remove_legacy_firewall_rule(self, dst_ip):
# This function removes the legacy firewall rule that was added <= 2.2.25.
# Not adding the global _enable_firewall check here as this will only be called once per service start and
# we dont want the state of this call to affect other iptable calls.
try:
wait = self.get_firewall_will_wait()
# This rule was <= 2.2.25 only, and may still exist on some VMs. Until 2.2.25
# has aged out, keep this cleanup in place.
self._delete_rule(get_firewall_delete_conntrack_accept_command(wait, dst_ip))
except Exception as error:
logger.info(
"Unable to remove legacy firewall rule, won't try removing it again. Error: {0}".format(ustr(error)))
def enable_firewall(self, dst_ip, uid):
"""
It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, missing rules array)
enable firewall success status: Returns True if every firewall rule exists otherwise False
missing rules: array with names of the missing rules ("ACCEPT DNS", "ACCEPT", "DROP")
"""
# If a previous attempt failed, do not retry
global _enable_firewall # pylint: disable=W0603
if not _enable_firewall:
return False, []
missing_rules = []
try:
wait = self.get_firewall_will_wait()
# check every iptable rule and delete others if any rule is missing
# and append every iptable rule to the end of the chain.
try:
missing_rules.extend(AddFirewallRules.get_missing_iptables_rules(wait, dst_ip, uid))
if len(missing_rules) > 0:
self.remove_firewall(dst_ip, uid, wait)
AddFirewallRules.add_iptables_rules(wait, dst_ip, uid)
except CommandError as e:
if e.returncode == 2:
self.remove_firewall(dst_ip, uid, wait)
msg = "please upgrade iptables to a version that supports the -C option"
logger.warn(msg)
raise
except Exception as error:
logger.warn(ustr(error))
raise
return True, missing_rules
except Exception as e:
_enable_firewall = False
logger.info("Unable to establish firewall -- "
"no further attempts will be made: "
"{0}".format(ustr(e)))
return False, missing_rules
def get_firewall_list(self, wait=None):
try:
if wait is None:
wait = self.get_firewall_will_wait()
output = shellutil.run_command(get_firewall_list_command(wait))
return output
except Exception as e:
logger.warn("Listing firewall rules failed: {0}".format(ustr(e)))
return ""
@staticmethod
def _correct_instance_id(instance_id):
"""
Azure stores the instance ID with an incorrect byte ordering for the
first parts. For example, the ID returned by the metadata service:
D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8
will be found as:
544CDFD0-CB4E-4B4A-9954-5BDF3ED5C3B8
This code corrects the byte order such that it is consistent with
that returned by the metadata service.
"""
if not UUID_PATTERN.match(instance_id):
return instance_id
parts = instance_id.split('-')
return '-'.join([
textutil.swap_hexstring(parts[0], width=2),
textutil.swap_hexstring(parts[1], width=2),
textutil.swap_hexstring(parts[2], width=2),
parts[3],
parts[4]
])
def is_current_instance_id(self, id_that):
"""
Compare two instance IDs for equality, but allow that some IDs
may have been persisted using the incorrect byte ordering.
"""
id_this = self.get_instance_id()
logger.verbose("current instance id: {0}".format(id_this))
logger.verbose(" former instance id: {0}".format(id_that))
return id_this.lower() == id_that.lower() or \
id_this.lower() == self._correct_instance_id(id_that).lower()
def get_agent_conf_file_path(self):
return self.agent_conf_file_path
def get_instance_id(self):
"""
Azure records a UUID as the instance ID
First check /sys/class/dmi/id/product_uuid.
If that is missing, then extracts from dmidecode
If nothing works (for old VMs), return the empty string
"""
if os.path.isfile(PRODUCT_ID_FILE):
s = fileutil.read_file(PRODUCT_ID_FILE).strip()
else:
rc, s = shellutil.run_get_output(DMIDECODE_CMD)
if rc != 0 or UUID_PATTERN.match(s) is None:
return ""
return self._correct_instance_id(s.strip())
@staticmethod
def get_userentry(username):
try:
return pwd.getpwnam(username)
except KeyError:
return None
def get_root_username(self):
return "root"
def is_sys_user(self, username):
"""
Check whether use is a system user.
If reset sys user is allowed in conf, return False
Otherwise, check whether UID is less than UID_MIN
"""
if conf.get_allow_reset_sys_user():
return False
userentry = self.get_userentry(username)
uidmin = None
try:
uidmin_def = fileutil.get_line_startingwith("UID_MIN",
"/etc/login.defs")
if uidmin_def is not None:
uidmin = int(uidmin_def.split()[1])
except IOError as e: # pylint: disable=W0612
pass
if uidmin == None:
uidmin = 100
if userentry != None and userentry[2] < uidmin:
return True
else:
return False
def useradd(self, username, expiration=None, comment=None):
"""
Create user account with 'username'
"""
userentry = self.get_userentry(username)
if userentry is not None:
logger.info("User {0} already exists, skip useradd", username)
return
if expiration is not None:
cmd = ["useradd", "-m", username, "-e", expiration]
else:
cmd = ["useradd", "-m", username]
if comment is not None:
cmd.extend(["-c", comment])
self._run_command_raising_OSUtilError(cmd, err_msg="Failed to create user account:{0}".format(username))
def chpasswd(self, username, password, crypt_id=6, salt_len=10):
if self.is_sys_user(username):
raise OSUtilError(("User {0} is a system user, "
"will not set password.").format(username))
passwd_hash = textutil.gen_password_hash(password, crypt_id, salt_len)
self._run_command_raising_OSUtilError(["usermod", "-p", passwd_hash, username],
err_msg="Failed to set password for {0}".format(username))
def get_users(self):
return getpwall()
def conf_sudoer(self, username, nopasswd=False, remove=False):
sudoers_dir = conf.get_sudoers_dir()
sudoers_wagent = os.path.join(sudoers_dir, 'waagent')
if not remove:
# for older distros create sudoers.d
if not os.path.isdir(sudoers_dir):
# create the sudoers.d directory
fileutil.mkdir(sudoers_dir)
# add the include of sudoers.d to the /etc/sudoers
sudoers_file = os.path.join(sudoers_dir, os.pardir, 'sudoers')
include_sudoers_dir = "\n#includedir {0}\n".format(sudoers_dir)
fileutil.append_file(sudoers_file, include_sudoers_dir)
sudoer = None
if nopasswd:
sudoer = "{0} ALL=(ALL) NOPASSWD: ALL".format(username)
else:
sudoer = "{0} ALL=(ALL) ALL".format(username)
if not os.path.isfile(sudoers_wagent) or \
fileutil.findstr_in_file(sudoers_wagent, sudoer) is False:
fileutil.append_file(sudoers_wagent, "{0}\n".format(sudoer))
fileutil.chmod(sudoers_wagent, 0o440)
else:
# remove user from sudoers
if os.path.isfile(sudoers_wagent):
try:
content = fileutil.read_file(sudoers_wagent)
sudoers = content.split("\n")
sudoers = [x for x in sudoers if username not in x]
fileutil.write_file(sudoers_wagent, "\n".join(sudoers))
except IOError as e:
raise OSUtilError("Failed to remove sudoer: {0}".format(e))
def del_root_password(self):
try:
passwd_file_path = conf.get_passwd_file_path()
passwd_content = fileutil.read_file(passwd_file_path)
passwd = passwd_content.split('\n')
new_passwd = [x for x in passwd if not x.startswith("root:")]
new_passwd.insert(0, "root:*LOCK*:14600::::::")
fileutil.write_file(passwd_file_path, "\n".join(new_passwd))
except IOError as e:
raise OSUtilError("Failed to delete root password:{0}".format(e))
@staticmethod
def _norm_path(filepath):
home = conf.get_home_dir()
# Expand HOME variable if present in path
path = os.path.normpath(filepath.replace("$HOME", home))
return path
def deploy_ssh_keypair(self, username, keypair):
"""
Deploy id_rsa and id_rsa.pub
"""
path, thumbprint = keypair
path = self._norm_path(path)
dir_path = os.path.dirname(path)
fileutil.mkdir(dir_path, mode=0o700, owner=username)
lib_dir = conf.get_lib_dir()
prv_path = os.path.join(lib_dir, thumbprint + '.prv')
if not os.path.isfile(prv_path):
raise OSUtilError("Can't find {0}.prv".format(thumbprint))
shutil.copyfile(prv_path, path)
pub_path = path + '.pub'
crytputil = CryptUtil(conf.get_openssl_cmd())
pub = crytputil.get_pubkey_from_prv(prv_path)
fileutil.write_file(pub_path, pub)
self.set_selinux_context(pub_path, 'unconfined_u:object_r:ssh_home_t:s0')
self.set_selinux_context(path, 'unconfined_u:object_r:ssh_home_t:s0')
os.chmod(path, 0o644)
os.chmod(pub_path, 0o600)
def openssl_to_openssh(self, input_file, output_file):
cryptutil = CryptUtil(conf.get_openssl_cmd())
cryptutil.crt_to_ssh(input_file, output_file)
def deploy_ssh_pubkey(self, username, pubkey):
"""
Deploy authorized_key
"""
path, thumbprint, value = pubkey
if path is None:
raise OSUtilError("Public key path is None")
crytputil = CryptUtil(conf.get_openssl_cmd())
path = self._norm_path(path)
dir_path = os.path.dirname(path)
fileutil.mkdir(dir_path, mode=0o700, owner=username)
if value is not None:
if not value.startswith("ssh-"):
raise OSUtilError("Bad public key: {0}".format(value))
if not value.endswith("\n"):
value += "\n"
fileutil.write_file(path, value)
elif thumbprint is not None:
lib_dir = conf.get_lib_dir()
crt_path = os.path.join(lib_dir, thumbprint + '.crt')
if not os.path.isfile(crt_path):
raise OSUtilError("Can't find {0}.crt".format(thumbprint))
pub_path = os.path.join(lib_dir, thumbprint + '.pub')
pub = crytputil.get_pubkey_from_crt(crt_path)
fileutil.write_file(pub_path, pub)
self.set_selinux_context(pub_path,
'unconfined_u:object_r:ssh_home_t:s0')
self.openssl_to_openssh(pub_path, path)
fileutil.chmod(pub_path, 0o600)
else:
raise OSUtilError("SSH public key Fingerprint and Value are None")
self.set_selinux_context(path, 'unconfined_u:object_r:ssh_home_t:s0')
fileutil.chowner(path, username)
fileutil.chmod(path, 0o644)
def is_selinux_system(self):
"""
Checks and sets self.selinux = True if SELinux is available on system.
"""
if self.selinux == None:
if shellutil.run("which getenforce", chk_err=False) == 0:
self.selinux = True
else:
self.selinux = False
return self.selinux
def is_selinux_enforcing(self):
"""
Calls shell command 'getenforce' and returns True if 'Enforcing'.
"""
if self.is_selinux_system():
output = shellutil.run_get_output("getenforce")[1]
return output.startswith("Enforcing")
else:
return False
def set_selinux_context(self, path, con): # pylint: disable=R1710
"""
Calls shell 'chcon' with 'path' and 'con' context.
Returns exit result.
"""
if self.is_selinux_system():
if not os.path.exists(path):
logger.error("Path does not exist: {0}".format(path))
return 1
try:
shellutil.run_command(['chcon', con, path], log_error=True)
except shellutil.CommandError as cmd_err:
return cmd_err.returncode
return 0
def conf_sshd(self, disable_password):
option = "no" if disable_password else "yes"
conf_file_path = conf.get_sshd_conf_file_path()
conf_file = fileutil.read_file(conf_file_path).split("\n")
textutil.set_ssh_config(conf_file, "PasswordAuthentication", option)
textutil.set_ssh_config(conf_file, "ChallengeResponseAuthentication", option)
textutil.set_ssh_config(conf_file, "ClientAliveInterval", str(conf.get_ssh_client_alive_interval()))
fileutil.write_file(conf_file_path, "\n".join(conf_file))
logger.info("{0} SSH password-based authentication methods."
.format("Disabled" if disable_password else "Enabled"))
logger.info("Configured SSH client probing to keep connections alive.")
def get_dvd_device(self, dev_dir='/dev'):
pattern = r'(sr[0-9]|hd[c-z]|cdrom[0-9]|cd[0-9]|vd[b-z])'
device_list = os.listdir(dev_dir)
for dvd in [re.match(pattern, dev) for dev in device_list]:
if dvd is not None:
return "/dev/{0}".format(dvd.group(0))
inner_detail = "The following devices were found, but none matched " \
"the pattern [{0}]: {1}\n".format(pattern, device_list)
raise OSUtilError(msg="Failed to get dvd device from {0}".format(dev_dir),
inner=inner_detail)
def mount_dvd(self,
max_retry=6,
chk_err=True,
dvd_device=None,
mount_point=None,
sleep_time=5):
if dvd_device is None:
dvd_device = self.get_dvd_device()
if mount_point is None:
mount_point = conf.get_dvd_mount_point()
mount_list = shellutil.run_get_output("mount")[1]
existing = self.get_mount_point(mount_list, dvd_device)
if existing is not None:
# already mounted
logger.info("{0} is already mounted at {1}", dvd_device, existing)
return
if not os.path.isdir(mount_point):
os.makedirs(mount_point)
err = ''
for retry in range(1, max_retry):
return_code, err = self.mount(dvd_device,
mount_point,
option=["-o", "ro", "-t", "udf,iso9660,vfat"],
chk_err=False)
if return_code == 0:
logger.info("Successfully mounted dvd")
return
else:
logger.warn(
"Mounting dvd failed [retry {0}/{1}, sleeping {2} sec]",
retry,
max_retry - 1,
sleep_time)
if retry < max_retry:
time.sleep(sleep_time)
if chk_err:
raise OSUtilError("Failed to mount dvd device", inner=err)
def umount_dvd(self, chk_err=True, mount_point=None):
if mount_point is None:
mount_point = conf.get_dvd_mount_point()
return_code = self.umount(mount_point, chk_err=chk_err)
if chk_err and return_code != 0:
raise OSUtilError("Failed to unmount dvd device at {0}".format(mount_point))
def eject_dvd(self, chk_err=True):
dvd = self.get_dvd_device()
dev = dvd.rsplit('/', 1)[1]
pattern = r'(vd[b-z])'
# We should not eject if the disk is not a cdrom
if re.search(pattern, dev):
return
try:
shellutil.run_command(["eject", dvd])
except shellutil.CommandError as cmd_err:
if chk_err:
msg = "Failed to eject dvd: ret={0}\n[stdout]\n{1}\n\n[stderr]\n{2}"\
.format(cmd_err.returncode, cmd_err.stdout, cmd_err.stderr)
raise OSUtilError(msg)
def try_load_atapiix_mod(self):
try:
self.load_atapiix_mod()
except Exception as e:
logger.warn("Could not load ATAPI driver: {0}".format(e))
def load_atapiix_mod(self):
if self.is_atapiix_mod_loaded():
return
ret, kern_version = shellutil.run_get_output("uname -r")
if ret != 0:
raise Exception("Failed to call uname -r")
mod_path = os.path.join('/lib/modules',
kern_version.strip('\n'),
'kernel/drivers/ata/ata_piix.ko')
if not os.path.isfile(mod_path):
raise Exception("Can't find module file:{0}".format(mod_path))
ret, output = shellutil.run_get_output("insmod " + mod_path) # pylint: disable=W0612
if ret != 0:
raise Exception("Error calling insmod for ATAPI CD-ROM driver")
if not self.is_atapiix_mod_loaded(max_retry=3):
raise Exception("Failed to load ATAPI CD-ROM driver")
def is_atapiix_mod_loaded(self, max_retry=1):
for retry in range(0, max_retry):
ret = shellutil.run("lsmod | grep ata_piix", chk_err=False)
if ret == 0:
logger.info("Module driver for ATAPI CD-ROM is already present.")
return True
if retry < max_retry - 1:
time.sleep(1)
return False
def mount(self, device, mount_point, option=None, chk_err=True):
if not option:
option = []
cmd = ["mount"]
cmd.extend(option + [device, mount_point])
try:
output = shellutil.run_command(cmd, log_error=chk_err)
except shellutil.CommandError as cmd_err:
detail = "[{0}] returned {1}:\n stdout: {2}\n\nstderr: {3}".format(cmd, cmd_err.returncode,
cmd_err.stdout, cmd_err.stderr)
return cmd_err.returncode, detail
return 0, output
def umount(self, mount_point, chk_err=True):
try:
shellutil.run_command(["umount", mount_point], log_error=chk_err)
except shellutil.CommandError as cmd_err:
return cmd_err.returncode
return 0
def allow_dhcp_broadcast(self):
# Open DHCP port if iptables is enabled.
# We supress error logging on error.
shellutil.run("iptables -D INPUT -p udp --dport 68 -j ACCEPT",
chk_err=False)
shellutil.run("iptables -I INPUT -p udp --dport 68 -j ACCEPT",
chk_err=False)
def remove_rules_files(self, rules_files=None):
if rules_files is None:
rules_files = __RULES_FILES__
lib_dir = conf.get_lib_dir()
for src in rules_files:
file_name = fileutil.base_name(src)
dest = os.path.join(lib_dir, file_name)
if os.path.isfile(dest):
os.remove(dest)
if os.path.isfile(src):
logger.warn("Move rules file {0} to {1}", file_name, dest)
shutil.move(src, dest)
def restore_rules_files(self, rules_files=None):
if rules_files is None:
rules_files = __RULES_FILES__
lib_dir = conf.get_lib_dir()
for dest in rules_files:
filename = fileutil.base_name(dest)
src = os.path.join(lib_dir, filename)
if os.path.isfile(dest):
continue
if os.path.isfile(src):
logger.warn("Move rules file {0} to {1}", filename, dest)
shutil.move(src, dest)
def get_mac_addr(self):
"""
Convenience function, returns mac addr bound to
first non-loopback interface.
"""
ifname = self.get_if_name()
addr = self.get_if_mac(ifname)
return textutil.hexstr_to_bytearray(addr)
def get_if_mac(self, ifname):
"""
Return the mac-address bound to the socket.
"""
sock = socket.socket(socket.AF_INET,
socket.SOCK_DGRAM,
socket.IPPROTO_UDP)
param = struct.pack('256s', (ifname[:15] + ('\0' * 241)).encode('latin-1'))
info = fcntl.ioctl(sock.fileno(), IOCTL_SIOCGIFHWADDR, param)
sock.close()
return ''.join(['%02X' % textutil.str_to_ord(char) for char in info[18:24]])
@staticmethod
def _get_struct_ifconf_size():
"""
Return the sizeof struct ifinfo. On 64-bit platforms the size is 40 bytes;
on 32-bit platforms the size is 32 bytes.
"""
python_arc = platform.architecture()[0]
struct_size = 32 if python_arc == '32bit' else 40
return struct_size
def _get_all_interfaces(self):
"""
Return a dictionary mapping from interface name to IPv4 address.
Interfaces without a name are ignored.
"""
expected = 16 # how many devices should I expect...
struct_size = DefaultOSUtil._get_struct_ifconf_size()
array_size = expected * struct_size
buff = array.array('B', b'\0' * array_size)
param = struct.pack('iL', array_size, buff.buffer_info()[0])
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
ret = fcntl.ioctl(sock.fileno(), IOCTL_SIOCGIFCONF, param)
retsize = (struct.unpack('iL', ret)[0])
sock.close()
if retsize == array_size:
logger.warn(('SIOCGIFCONF returned more than {0} up '
'network interfaces.'), expected)
ifconf_buff = array_to_bytes(buff)
ifaces = {}
for i in range(0, array_size, struct_size):
iface = ifconf_buff[i:i + IFNAMSIZ].split(b'\0', 1)[0]
if len(iface) > 0:
iface_name = iface.decode('latin-1')
if iface_name not in ifaces:
ifaces[iface_name] = socket.inet_ntoa(ifconf_buff[i + 20:i + 24])
return ifaces
def get_first_if(self):
"""
Return the interface name, and IPv4 addr of the "primary" interface or,
failing that, any active non-loopback interface.
"""
primary = self.get_primary_interface()
ifaces = self._get_all_interfaces()
if primary in ifaces:
return primary, ifaces[primary]
for iface_name in ifaces.keys():
if not self.is_loopback(iface_name):
logger.info("Choosing non-primary [{0}]".format(iface_name))
return iface_name, ifaces[iface_name]
return '', ''
@staticmethod
def _build_route_list(proc_net_route):
"""
Construct a list of network route entries
:param list(str) proc_net_route: Route table lines, including headers, containing at least one route
:return: List of network route objects
:rtype: list(RouteEntry)
"""
idx = 0
column_index = {}
header_line = proc_net_route[0]
for header in filter(lambda h: len(h) > 0, header_line.split("\t")):
column_index[header.strip()] = idx
idx += 1
try:
idx_iface = column_index["Iface"]
idx_dest = column_index["Destination"]
idx_gw = column_index["Gateway"]
idx_flags = column_index["Flags"]
idx_metric = column_index["Metric"]
idx_mask = column_index["Mask"]
except KeyError:
msg = "/proc/net/route is missing key information; headers are [{0}]".format(header_line)
logger.error(msg)
return []
route_list = []
for entry in proc_net_route[1:]:
route = entry.split("\t")
if len(route) > 0:
route_obj = RouteEntry(route[idx_iface], route[idx_dest], route[idx_gw], route[idx_mask],
route[idx_flags], route[idx_metric])
route_list.append(route_obj)
return route_list
@staticmethod
def read_route_table():
"""
Return a list of strings comprising the route table, including column headers. Each line is stripped of leading
or trailing whitespace but is otherwise unmolested.
:return: Entries in the text route table
:rtype: list(str)
"""
try:
with open('/proc/net/route') as routing_table:
return list(map(str.strip, routing_table.readlines()))
except Exception as e:
logger.error("Cannot read route table [{0}]", ustr(e))
return []
@staticmethod
def get_list_of_routes(route_table):
"""
Construct a list of all network routes known to this system.
:param list(str) route_table: List of text entries from route table, including headers
:return: a list of network routes
:rtype: list(RouteEntry)
"""
route_list = []
count = len(route_table)
if count < 1:
logger.error("/proc/net/route is missing headers")
elif count == 1:
logger.error("/proc/net/route contains no routes")
else:
route_list = DefaultOSUtil._build_route_list(route_table)
return route_list
def get_primary_interface(self):
"""
Get the name of the primary interface, which is the one with the
default route attached to it; if there are multiple default routes,
the primary has the lowest Metric.
:return: the interface which has the default route
"""
# from linux/route.h
RTF_GATEWAY = 0x02
DEFAULT_DEST = "00000000"
primary_interface = None
if not self.disable_route_warning:
logger.info("Examine /proc/net/route for primary interface")
route_table = DefaultOSUtil.read_route_table()
def is_default(route):
return route.destination == DEFAULT_DEST and int(route.flags) & RTF_GATEWAY == RTF_GATEWAY
candidates = list(filter(is_default, DefaultOSUtil.get_list_of_routes(route_table)))
if len(candidates) > 0:
def get_metric(route):
return int(route.metric)
primary_route = min(candidates, key=get_metric)
primary_interface = primary_route.interface
if primary_interface is None:
primary_interface = ''
if not self.disable_route_warning:
with open('/proc/net/route') as routing_table_fh:
routing_table_text = routing_table_fh.read()
logger.warn('Could not determine primary interface, '
'please ensure /proc/net/route is correct')
logger.warn('Contents of /proc/net/route:\n{0}'.format(routing_table_text))
logger.warn('Primary interface examination will retry silently')
self.disable_route_warning = True
else:
logger.info('Primary interface is [{0}]'.format(primary_interface))
self.disable_route_warning = False
return primary_interface
def is_primary_interface(self, ifname):
"""
Indicate whether the specified interface is the primary.
:param ifname: the name of the interface - eth0, lo, etc.
:return: True if this interface binds the default route
"""
return self.get_primary_interface() == ifname
def is_loopback(self, ifname):
"""
Determine if a named interface is loopback.
"""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
ifname_buff = ifname + ('\0' * 256)
result = fcntl.ioctl(s.fileno(), IOCTL_SIOCGIFFLAGS, ifname_buff)
flags, = struct.unpack('H', result[16:18])
isloopback = flags & 8 == 8
if not self.disable_route_warning:
logger.info('interface [{0}] has flags [{1}], '
'is loopback [{2}]'.format(ifname, flags, isloopback))
s.close()
return isloopback
def get_dhcp_lease_endpoint(self):
"""
OS specific, this should return the decoded endpoint of
the wireserver from option 245 in the dhcp leases file
if it exists on disk.
:return: The endpoint if available, or None
"""
return None
@staticmethod
def get_endpoint_from_leases_path(pathglob):
"""
Try to discover and decode the wireserver endpoint in the
specified dhcp leases path.
:param pathglob: The path containing dhcp lease files
:return: The endpoint if available, otherwise None
"""
endpoint = None
HEADER_LEASE = "lease"
HEADER_OPTION_245 = "option unknown-245"
HEADER_EXPIRE = "expire"
FOOTER_LEASE = "}"
FORMAT_DATETIME = "%Y/%m/%d %H:%M:%S"
option_245_re = re.compile(
r'\s*option\s+unknown-245\s+([0-9a-fA-F]+):([0-9a-fA-F]+):([0-9a-fA-F]+):([0-9a-fA-F]+);')
logger.info("looking for leases in path [{0}]".format(pathglob))
for lease_file in glob.glob(pathglob):
leases = open(lease_file).read()
if HEADER_OPTION_245 in leases:
cached_endpoint = None
option_245_match = None
expired = True # assume expired
for line in leases.splitlines():
if line.startswith(HEADER_LEASE):
cached_endpoint = None
expired = True
elif HEADER_EXPIRE in line:
if "never" in line:
expired = False
else:
try:
expire_string = line.split(" ", 4)[-1].strip(";")
expire_date = datetime.datetime.strptime(expire_string, FORMAT_DATETIME)
if expire_date > datetime.datetime.utcnow():
expired = False
except: # pylint: disable=W0702
logger.error("could not parse expiry token '{0}'".format(line))
elif FOOTER_LEASE in line:
logger.info("dhcp entry:{0}, 245:{1}, expired:{2}".format(
cached_endpoint, option_245_match is not None, expired))
if not expired and cached_endpoint is not None:
endpoint = cached_endpoint
logger.info("found endpoint [{0}]".format(endpoint))
# we want to return the last valid entry, so
# keep searching
else:
option_245_match = option_245_re.match(line)
if option_245_match is not None:
cached_endpoint = '{0}.{1}.{2}.{3}'.format(
int(option_245_match.group(1), 16),
int(option_245_match.group(2), 16),
int(option_245_match.group(3), 16),
int(option_245_match.group(4), 16))
if endpoint is not None:
logger.info("cached endpoint found [{0}]".format(endpoint))
else:
logger.info("cached endpoint not found")
return endpoint
def is_missing_default_route(self):
try:
route_cmd = ["ip", "route", "show"]
routes = shellutil.run_command(route_cmd)
for route in routes.split("\n"):
if route.startswith("0.0.0.0 ") or route.startswith("default "):
return False
return True
except CommandError as e:
logger.warn("Cannot get the routing table. {0} failed: {1}", ustr(route_cmd), ustr(e))
return False
def get_if_name(self):
if_name = ''
if_found = False
while not if_found:
if_name = self.get_first_if()[0]
if_found = len(if_name) >= 2
if not if_found:
time.sleep(2)
return if_name
def get_ip4_addr(self):
return self.get_first_if()[1]
def set_route_for_dhcp_broadcast(self, ifname):
try:
route_cmd = ["ip", "route", "add", "255.255.255.255", "dev", ifname]
return shellutil.run_command(route_cmd)
except CommandError:
return ""
def remove_route_for_dhcp_broadcast(self, ifname):
try:
route_cmd = ["ip", "route", "del", "255.255.255.255", "dev", ifname]
shellutil.run_command(route_cmd)
except CommandError:
pass
def is_dhcp_available(self):
return True
def is_dhcp_enabled(self):
return False
def stop_dhcp_service(self):
pass
def start_dhcp_service(self):
pass
def start_network(self):
pass
def start_agent_service(self):
pass
def stop_agent_service(self):
pass
def register_agent_service(self):
pass
def unregister_agent_service(self):
pass
def restart_ssh_service(self):
pass
def route_add(self, net, mask, gateway): # pylint: disable=W0613
"""
Add specified route
"""
try:
cmd = ["ip", "route", "add", net, "via", gateway]
return shellutil.run_command(cmd)
except CommandError:
return ""
@staticmethod
def _text_to_pid_list(text):
return [int(n) for n in text.split()]
@staticmethod
def _get_dhcp_pid(command, transform_command_output=None):
try:
output = shellutil.run_command(command)
if transform_command_output is not None:
output = transform_command_output(output)
return DefaultOSUtil._text_to_pid_list(output)
except CommandError as exception: # pylint: disable=W0612
return []
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "dhclient"])
def set_hostname(self, hostname):
fileutil.write_file('/etc/hostname', hostname)
self._run_command_without_raising(["hostname", hostname], log_error=False)
def set_dhcp_hostname(self, hostname):
autosend = r'^[^#]*?send\s*host-name.*?(|gethostname[(,)])'
dhclient_files = ['/etc/dhcp/dhclient.conf', '/etc/dhcp3/dhclient.conf', '/etc/dhclient.conf']
for conf_file in dhclient_files:
if not os.path.isfile(conf_file):
continue
if fileutil.findre_in_file(conf_file, autosend):
# Return if auto send host-name is configured
return
fileutil.update_conf_file(conf_file,
'send host-name',
'send host-name "{0}";'.format(hostname))
def restart_if(self, ifname, retries=3, wait=5):
retry_limit = retries + 1
for attempt in range(1, retry_limit):
return_code = shellutil.run("ifdown {0} && ifup {0}".format(ifname), expected_errors=[1] if attempt < retries else [])
if return_code == 0:
return
logger.warn("failed to restart {0}: return code {1}".format(ifname, return_code))
if attempt < retry_limit:
logger.info("retrying in {0} seconds".format(wait))
time.sleep(wait)
else:
logger.warn("exceeded restart retries")
def check_and_recover_nic_state(self, ifname):
# TODO: This should be implemented for all distros where we reset the network during publishing hostname. Currently it is only implemented in RedhatOSUtil.
pass
def publish_hostname(self, hostname, recover_nic=False):
"""
Publishes the provided hostname.
"""
self.set_dhcp_hostname(hostname)
self.set_hostname_record(hostname)
ifname = self.get_if_name()
self.restart_if(ifname)
if recover_nic:
self.check_and_recover_nic_state(ifname)
def set_scsi_disks_timeout(self, timeout):
for dev in os.listdir("/sys/block"):
if dev.startswith('sd'):
self.set_block_device_timeout(dev, timeout)
def set_block_device_timeout(self, dev, timeout):
if dev is not None and timeout is not None:
file_path = "/sys/block/{0}/device/timeout".format(dev)
content = fileutil.read_file(file_path)
original = content.splitlines()[0].rstrip()
if original != timeout:
fileutil.write_file(file_path, timeout)
logger.info("Set block dev timeout: {0} with timeout: {1}",
dev, timeout)
def get_mount_point(self, mountlist, device):
"""
Example of mountlist:
/dev/sda1 on / type ext4 (rw)
proc on /proc type proc (rw)
sysfs on /sys type sysfs (rw)
devpts on /dev/pts type devpts (rw,gid=5,mode=620)
tmpfs on /dev/shm type tmpfs
(rw,rootcontext="system_u:object_r:tmpfs_t:s0")
none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw)
/dev/sdb1 on /mnt/resource type ext4 (rw)
"""
if (mountlist and device):
for entry in mountlist.split('\n'):
if (re.search(device, entry)):
tokens = entry.split()
# Return the 3rd column of this line
return tokens[2] if len(tokens) > 2 else None
return None
@staticmethod
def _enumerate_device_id():
"""
Enumerate all storage device IDs.
Args:
None
Returns:
Iterator[Tuple[str, str]]: VmBus and storage devices.
"""
if os.path.exists(STORAGE_DEVICE_PATH):
for vmbus in os.listdir(STORAGE_DEVICE_PATH):
deviceid = fileutil.read_file(os.path.join(STORAGE_DEVICE_PATH, vmbus, "device_id"))
guid = deviceid.strip('{}\n')
yield vmbus, guid
@staticmethod
def search_for_resource_disk(gen1_device_prefix, gen2_device_id):
"""
Search the filesystem for a device by ID or prefix.
Args:
gen1_device_prefix (str): Gen1 resource disk prefix.
gen2_device_id (str): Gen2 resource device ID.
Returns:
str: The found device.
"""
device = None
# We have to try device IDs for both Gen1 and Gen2 VMs.
logger.info('Searching gen1 prefix {0} or gen2 {1}'.format(gen1_device_prefix, gen2_device_id))
try:
for vmbus, guid in DefaultOSUtil._enumerate_device_id():
if guid.startswith(gen1_device_prefix) or guid == gen2_device_id:
for root, dirs, files in os.walk(STORAGE_DEVICE_PATH + vmbus): # pylint: disable=W0612
root_path_parts = root.split('/')
# For Gen1 VMs we only have to check for the block dir in the
# current device. But for Gen2 VMs all of the disks (sda, sdb,
# sr0) are presented in this device on the same SCSI controller.
# Because of that we need to also read the LUN. It will be:
# 0 - OS disk
# 1 - Resource disk
# 2 - CDROM
if root_path_parts[-1] == 'block' and (
guid != gen2_device_id or
root_path_parts[-2].split(':')[-1] == '1'):
device = dirs[0]
return device
else:
# older distros
for d in dirs:
if ':' in d and "block" == d.split(':')[0]:
device = d.split(':')[1]
return device
except (OSError, IOError) as exc:
logger.warn('Error getting device for {0} or {1}: {2}', gen1_device_prefix, gen2_device_id, ustr(exc))
return None
def device_for_ide_port(self, port_id):
"""
Return device name attached to ide port 'n'.
"""
if port_id > 3:
return None
g0 = "00000000"
if port_id > 1:
g0 = "00000001"
port_id = port_id - 2
gen1_device_prefix = '{0}-000{1}'.format(g0, port_id)
device = DefaultOSUtil.search_for_resource_disk(
gen1_device_prefix=gen1_device_prefix,
gen2_device_id=GEN2_DEVICE_ID
)
logger.info('Found device: {0}'.format(device))
return device
def set_hostname_record(self, hostname):
fileutil.write_file(conf.get_published_hostname(), contents=hostname)
def get_hostname_record(self):
hostname_record = conf.get_published_hostname()
if not os.path.exists(hostname_record):
# older agents (but newer or equal to 2.2.3) create published_hostname during provisioning; when provisioning is done
# by cloud-init the hostname is written to set-hostname
hostname = self._get_cloud_init_hostname()
if hostname is None:
logger.info("Retrieving hostname using socket.gethostname()")
hostname = socket.gethostname()
logger.info('Published hostname record does not exist, creating [{0}] with hostname [{1}]', hostname_record, hostname)
self.set_hostname_record(hostname)
record = fileutil.read_file(hostname_record)
return record
@staticmethod
def _get_cloud_init_hostname():
"""
Retrieves the hostname set by cloud-init; returns None if cloud-init did not set the hostname or if there is an
error retrieving it.
"""
hostname_file = '/var/lib/cloud/data/set-hostname'
try:
if os.path.exists(hostname_file):
#
# The format is similar to
#
# $ cat /var/lib/cloud/data/set-hostname
# {
# "fqdn": "nam-u18",
# "hostname": "nam-u18"
# }
#
logger.info("Retrieving hostname from {0}", hostname_file)
with open(hostname_file, 'r') as file_:
hostname_info = json.load(file_)
if "hostname" in hostname_info:
return hostname_info["hostname"]
except Exception as exception:
logger.warn("Error retrieving hostname: {0}", ustr(exception))
return None
def del_account(self, username):
if self.is_sys_user(username):
logger.error("{0} is a system user. Will not delete it.", username)
self._run_command_without_raising(["touch", "/var/run/utmp"])
self._run_command_without_raising(['userdel', '-f', '-r', username])
self.conf_sudoer(username, remove=True)
def decode_customdata(self, data):
return base64.b64decode(data).decode('utf-8')
def get_total_mem(self):
# Get total memory in bytes and divide by 1024**2 to get the value in MB.
return os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') / (1024 ** 2)
def get_processor_cores(self):
return multiprocessing.cpu_count()
def check_pid_alive(self, pid):
try:
pid = int(pid)
os.kill(pid, 0)
except (ValueError, TypeError):
return False
except OSError as os_error:
if os_error.errno == errno.EPERM:
return True
return False
return True
@property
def is_64bit(self):
return sys.maxsize > 2 ** 32
@staticmethod
def _get_proc_stat():
"""
Get the contents of /proc/stat.
# cpu 813599 3940 909253 154538746 874851 0 6589 0 0 0
# cpu0 401094 1516 453006 77276738 452939 0 3312 0 0 0
# cpu1 412505 2423 456246 77262007 421912 0 3276 0 0 0
:return: A single string with the contents of /proc/stat
:rtype: str
"""
results = None
try:
results = fileutil.read_file('/proc/stat')
except (OSError, IOError) as ex:
logger.warn("Couldn't read /proc/stat: {0}".format(ex.strerror))
raise
return results
@staticmethod
def get_total_cpu_ticks_since_boot():
"""
Compute the number of USER_HZ units of time that have elapsed in all categories, across all cores, since boot.
:return: int
"""
system_cpu = 0
proc_stat = DefaultOSUtil._get_proc_stat()
if proc_stat is not None:
for line in proc_stat.splitlines():
if ALL_CPUS_REGEX.match(line):
system_cpu = sum(
int(i) for i in line.split()[1:8]) # see "man proc" for a description of these fields
break
return system_cpu
@staticmethod
def get_used_and_available_system_memory():
"""
Get the contents of free -b in bytes.
# free -b
# total used free shared buff/cache available
# Mem: 8340144128 619352064 5236809728 1499136 2483982336 7426314240
# Swap: 0 0 0
:return: used and available memory in megabytes
"""
used_mem = available_mem = 0
free_cmd = ["free", "-b"]
memory = shellutil.run_command(free_cmd)
for line in memory.split("\n"):
if ALL_MEMS_REGEX.match(line):
mems = line.split()
used_mem = int(mems[2])
available_mem = int(mems[6]) # see "man free" for a description of these fields
return used_mem/(1024 ** 2), available_mem/(1024 ** 2)
def get_nic_state(self, as_string=False):
"""
Capture NIC state (IPv4 and IPv6 addresses plus link state).
:return: By default returns a dictionary of NIC state objects, with the NIC name as key. If as_string is True
returns the state as a string
:rtype: dict(str,NetworkInformationCard)
"""
state = {}
all_command = ["ip", "-a", "-o", "link"]
inet_command = ["ip", "-4", "-a", "-o", "address"]
inet6_command = ["ip", "-6", "-a", "-o", "address"]
try:
all_output = shellutil.run_command(all_command)
except shellutil.CommandError as command_error:
logger.verbose("Could not fetch NIC link info: {0}", ustr(command_error))
return "" if as_string else {}
if as_string:
def run_command(command):
try:
return shellutil.run_command(command)
except shellutil.CommandError as command_error:
return str(command_error)
inet_output = run_command(inet_command)
inet6_output = run_command(inet6_command)
return "Executing {0}:\n{1}\nExecuting {2}:\n{3}\nExecuting {4}:\n{5}\n".format(all_command, all_output, inet_command, inet_output, inet6_command, inet6_output)
else:
self._update_nic_state_all(state, all_output)
self._update_nic_state(state, inet_command, NetworkInterfaceCard.add_ipv4, "an IPv4 address")
self._update_nic_state(state, inet6_command, NetworkInterfaceCard.add_ipv6, "an IPv6 address")
return state
@staticmethod
def _update_nic_state_all(state, command_output):
for entry in command_output.splitlines():
# Sample output:
# 1: lo: mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000\ link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0 addrgenmode eui64
# 2: eth0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000\ link/ether 00:0d:3a:30:c3:5a brd ff:ff:ff:ff:ff:ff promiscuity 0 addrgenmode eui64
# 3: docker0: mtu 1500 qdisc noqueue state DOWN mode DEFAULT group default \ link/ether 02:42:b5:d5:00:1d brd ff:ff:ff:ff:ff:ff promiscuity 0 \ bridge forward_delay 1500 hello_time 200 max_age 2000 ageing_time 30000 stp_state 0 priority 32768 vlan_filtering 0 vlan_protocol 802.1Q addrgenmode eui64
result = IP_COMMAND_OUTPUT.match(entry)
if result:
name = result.group(1)
state[name] = NetworkInterfaceCard(name, result.group(2))
@staticmethod
def _update_nic_state(state, ip_command, handler, description):
"""
Update the state of NICs based on the output of a specified ip subcommand.
:param dict(str, NetworkInterfaceCard) state: Dictionary of NIC state objects
:param str ip_command: The ip command to run
:param handler: A method on the NetworkInterfaceCard class
:param str description: Description of the particular information being added to the state
"""
try:
output = shellutil.run_command(ip_command)
for entry in output.splitlines():
# family inet sample output:
# 1: lo inet 127.0.0.1/8 scope host lo\ valid_lft forever preferred_lft forever
# 2: eth0 inet 10.145.187.220/26 brd 10.145.187.255 scope global eth0\ valid_lft forever preferred_lft forever
# 3: docker0 inet 192.168.43.1/24 brd 192.168.43.255 scope global docker0\ valid_lft forever preferred_lft forever
#
# family inet6 sample output:
# 1: lo inet6 ::1/128 scope host \ valid_lft forever preferred_lft forever
# 2: eth0 inet6 fe80::20d:3aff:fe30:c35a/64 scope link \ valid_lft forever preferred_lft forever
result = IP_COMMAND_OUTPUT.match(entry)
if result:
interface_name = result.group(1)
if interface_name in state:
handler(state[interface_name], result.group(2))
else:
logger.error("Interface {0} has {1} but no link state".format(interface_name, description))
except shellutil.CommandError as command_error:
logger.error("[{0}] failed: {1}", ' '.join(ip_command), str(command_error))
@staticmethod
def _run_command_without_raising(cmd, log_error=True):
try:
shellutil.run_command(cmd, log_error=log_error)
# Original implementation of run() does a blanket catch, so mimicking the behaviour here
except Exception:
pass
@staticmethod
def _run_multiple_commands_without_raising(commands, log_error=True, continue_on_error=False):
for cmd in commands:
try:
shellutil.run_command(cmd, log_error=log_error)
# Original implementation of run() does a blanket catch, so mimicking the behaviour here
except Exception:
if continue_on_error:
continue
break
@staticmethod
def _run_command_raising_OSUtilError(cmd, err_msg, cmd_input=None):
# This method runs shell command using the new secure shellutil.run_command and raises OSUtilErrors on failures.
try:
return shellutil.run_command(cmd, log_error=True, input=cmd_input)
except shellutil.CommandError as e:
raise OSUtilError(
"{0}, Retcode: {1}, Output: {2}, Error: {3}".format(err_msg, e.returncode, e.stdout, e.stderr))
except Exception as e:
raise OSUtilError("{0}, Retcode: {1}, Error: {2}".format(err_msg, -1, ustr(e)))
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/devuan.py 0000664 0000000 0000000 00000003453 14626177470 0025425 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class DevuanOSUtil(DefaultOSUtil):
def __init__(self):
super(DevuanOSUtil, self).__init__()
self.jit_enabled = True
def restart_ssh_service(self):
logger.info("DevuanOSUtil::restart_ssh_service - trying to restart sshd")
return shellutil.run("/usr/sbin/service restart ssh", chk_err=False)
def stop_agent_service(self):
logger.info("DevuanOSUtil::stop_agent_service - trying to stop waagent")
return shellutil.run("/usr/sbin/service walinuxagent stop", chk_err=False)
def start_agent_service(self):
logger.info("DevuanOSUtil::start_agent_service - trying to start waagent")
return shellutil.run("/usr/sbin/service walinuxagent start", chk_err=False)
def start_network(self):
pass
def remove_rules_files(self, rules_files=""):
pass
def restore_rules_files(self, rules_files=""):
pass
def get_dhcp_lease_endpoint(self):
return self.get_endpoint_from_leases_path('/var/lib/dhcp/dhclient.*.leases')
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/factory.py 0000664 0000000 0000000 00000013002 14626177470 0025601 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_CODE_NAME, DISTRO_VERSION, DISTRO_FULL_NAME
from .alpine import AlpineOSUtil
from .arch import ArchUtil
from .bigip import BigIpOSUtil
from .clearlinux import ClearLinuxUtil
from .coreos import CoreOSUtil
from .debian import DebianOSBaseUtil, DebianOSModernUtil
from .default import DefaultOSUtil
from .devuan import DevuanOSUtil
from .freebsd import FreeBSDOSUtil
from .gaia import GaiaOSUtil
from .iosxe import IosxeOSUtil
from .mariner import MarinerOSUtil
from .nsbsd import NSBSDOSUtil
from .openbsd import OpenBSDOSUtil
from .openwrt import OpenWRTOSUtil
from .redhat import RedhatOSUtil, Redhat6xOSUtil, RedhatOSModernUtil
from .suse import SUSEOSUtil, SUSE11OSUtil
from .photonos import PhotonOSUtil
from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, \
UbuntuSnappyOSUtil, Ubuntu16OSUtil, Ubuntu18OSUtil
from .fedora import FedoraOSUtil
def get_osutil(distro_name=DISTRO_NAME,
distro_code_name=DISTRO_CODE_NAME,
distro_version=DISTRO_VERSION,
distro_full_name=DISTRO_FULL_NAME):
# We are adding another layer of abstraction here since we want to be able to mock the final result of the
# function call. Since the get_osutil function is imported in various places in our tests, we can't mock
# it globally. Instead, we add _get_osutil function and mock it in the test base class, AgentTestCase.
return _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name)
def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name):
if distro_name == "photonos":
return PhotonOSUtil()
if distro_name == "arch":
return ArchUtil()
if "Clear Linux" in distro_full_name:
return ClearLinuxUtil()
if distro_name == "ubuntu":
ubuntu_version = Version(distro_version)
if ubuntu_version in [Version("12.04"), Version("12.10")]:
return Ubuntu12OSUtil()
if ubuntu_version in [Version("14.04"), Version("14.10")]:
return Ubuntu14OSUtil()
if ubuntu_version in [Version('16.04'), Version('16.10'), Version('17.04')]:
return Ubuntu16OSUtil()
if Version('18.04') <= ubuntu_version <= Version('24.04'):
return Ubuntu18OSUtil()
if distro_full_name == "Snappy Ubuntu Core":
return UbuntuSnappyOSUtil()
return UbuntuOSUtil()
if distro_name == "alpine":
return AlpineOSUtil()
if distro_name == "kali":
return DebianOSBaseUtil()
if distro_name in ("flatcar", "coreos") or distro_code_name in ("flatcar", "coreos"):
return CoreOSUtil()
if distro_name in ("suse", "sle_hpc", "sles", "opensuse"):
if distro_full_name == 'SUSE Linux Enterprise Server' \
and Version(distro_version) < Version('12') \
or distro_full_name == 'openSUSE' and Version(distro_version) < Version('13.2'):
return SUSE11OSUtil()
return SUSEOSUtil()
if distro_name == "debian":
if "sid" in distro_version or Version(distro_version) > Version("7"):
return DebianOSModernUtil()
return DebianOSBaseUtil()
# Devuan support only works with v4+
# Reason is that Devuan v4 (Chimaera) uses python v3.9, in which the
# platform.linux_distribution module has been removed. This was unable
# to distinguish between debian and devuan. The new distro.linux_distribution module
# is able to distinguish between the two.
if distro_name == "devuan" and Version(distro_version) >= Version("4"):
return DevuanOSUtil()
if distro_name in ("redhat", "rhel", "centos", "oracle", "almalinux",
"cloudlinux", "rocky"):
if Version(distro_version) < Version("7"):
return Redhat6xOSUtil()
if Version(distro_version) >= Version("8.6"):
return RedhatOSModernUtil()
return RedhatOSUtil()
if distro_name == "euleros":
return RedhatOSUtil()
if distro_name == "uos":
return RedhatOSUtil()
if distro_name == "freebsd":
return FreeBSDOSUtil()
if distro_name == "openbsd":
return OpenBSDOSUtil()
if distro_name == "bigip":
return BigIpOSUtil()
if distro_name == "gaia":
return GaiaOSUtil()
if distro_name == "iosxe":
return IosxeOSUtil()
if distro_name == "mariner":
return MarinerOSUtil()
if distro_name == "nsbsd":
return NSBSDOSUtil()
if distro_name == "openwrt":
return OpenWRTOSUtil()
if distro_name == "fedora":
return FedoraOSUtil()
logger.warn("Unable to load distro implementation for {0}. Using default distro implementation instead.", distro_name)
return DefaultOSUtil()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/fedora.py 0000664 0000000 0000000 00000004522 14626177470 0025401 0 ustar 00root root 0000000 0000000 #
# Copyright 2022 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import time
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class FedoraOSUtil(DefaultOSUtil):
def __init__(self):
super(FedoraOSUtil, self).__init__()
self.agent_conf_file_path = '/etc/waagent.conf'
@staticmethod
def get_systemd_unit_file_install_path():
return '/usr/lib/systemd/system'
@staticmethod
def get_agent_bin_path():
return '/usr/sbin'
def is_dhcp_enabled(self):
return True
def start_network(self):
pass
def restart_if(self, ifname=None, retries=None, wait=None):
retry_limit = retries+1
for attempt in range(1, retry_limit):
return_code = shellutil.run("ip link set {0} down && ip link set {0} up".format(ifname))
if return_code == 0:
return
logger.warn("failed to restart {0}: return code {1}".format(ifname, return_code))
if attempt < retry_limit:
logger.info("retrying in {0} seconds".format(wait))
time.sleep(wait)
else:
logger.warn("exceeded restart retries")
def restart_ssh_service(self):
shellutil.run('systemctl restart sshd')
def stop_dhcp_service(self):
pass
def start_dhcp_service(self):
pass
def start_agent_service(self):
return shellutil.run('systemctl start waagent', chk_err=False)
def stop_agent_service(self):
return shellutil.run('systemctl stop waagent', chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "dhclient"])
def conf_sshd(self, disable_password):
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/freebsd.py 0000664 0000000 0000000 00000060655 14626177470 0025564 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import socket
import struct
import binascii
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.utils.textutil as textutil
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.exception import OSUtilError
from azurelinuxagent.common.osutil.default import DefaultOSUtil
from azurelinuxagent.common.future import ustr
class FreeBSDOSUtil(DefaultOSUtil):
def __init__(self):
super(FreeBSDOSUtil, self).__init__()
self._scsi_disks_timeout_set = False
self.jit_enabled = True
@staticmethod
def get_agent_bin_path():
return "/usr/local/sbin"
def set_hostname(self, hostname):
rc_file_path = '/etc/rc.conf'
conf_file = fileutil.read_file(rc_file_path).split("\n")
textutil.set_ini_config(conf_file, "hostname", hostname)
fileutil.write_file(rc_file_path, "\n".join(conf_file))
self._run_command_without_raising(["hostname", hostname], log_error=False)
def restart_ssh_service(self):
return shellutil.run('service sshd restart', chk_err=False)
def useradd(self, username, expiration=None, comment=None):
"""
Create user account with 'username'
"""
userentry = self.get_userentry(username)
if userentry is not None:
logger.warn("User {0} already exists, skip useradd", username)
return
if expiration is not None:
cmd = ["pw", "useradd", username, "-e", expiration, "-m"]
else:
cmd = ["pw", "useradd", username, "-m"]
if comment is not None:
cmd.extend(["-c", comment])
self._run_command_raising_OSUtilError(cmd, err_msg="Failed to create user account:{0}".format(username))
def del_account(self, username):
if self.is_sys_user(username):
logger.error("{0} is a system user. Will not delete it.", username)
self._run_command_without_raising(['touch', '/var/run/utx.active'])
self._run_command_without_raising(['rmuser', '-y', username])
self.conf_sudoer(username, remove=True)
def chpasswd(self, username, password, crypt_id=6, salt_len=10):
if self.is_sys_user(username):
raise OSUtilError(("User {0} is a system user, "
"will not set password.").format(username))
passwd_hash = textutil.gen_password_hash(password, crypt_id, salt_len)
self._run_command_raising_OSUtilError(['pw', 'usermod', username, '-H', '0'], cmd_input=passwd_hash,
err_msg="Failed to set password for {0}".format(username))
def del_root_password(self):
err = shellutil.run('pw usermod root -h -')
if err:
raise OSUtilError("Failed to delete root password: Failed to update password database.")
def get_if_mac(self, ifname):
data = self._get_net_info()
if data[0] == ifname:
return data[2].replace(':', '').upper()
return None
def get_first_if(self):
return self._get_net_info()[:2]
@staticmethod
def read_route_table():
"""
Return a list of strings comprising the route table as in the Linux /proc/net/route format. The input taken is from FreeBSDs
`netstat -rn -f inet` command. Here is what the function does in detail:
1. Runs `netstat -rn -f inet` which outputs a column formatted list of ipv4 routes in priority order like so:
> Routing tables
>
> Internet:
> Destination Gateway Flags Refs Use Netif Expire
> default 61.221.xx.yy UGS 0 247 em1
> 10 10.10.110.5 UGS 0 50 em0
> 10.10.110/26 link#1 UC 0 0 em0
> 10.10.110.5 00:1b:0d:e6:58:40 UHLW 2 0 em0 1145
> 61.221.xx.yy/29 link#2 UC 0 0 em1
> 61.221.xx.yy 00:1b:0d:e6:57:c0 UHLW 2 0 em1 1055
> 61.221.xx/24 link#2 UC 0 0 em1
> 127.0.0.1 127.0.0.1 UH 0 0 lo0
2. Convert it to an array of lines that resemble an equivalent /proc/net/route content on a Linux system like so:
> Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT
> gre828 00000000 00000000 0001 0 0 0 000000F8 0 0 0
> ens160 00000000 FE04700A 0003 0 0 100 00000000 0 0 0
> gre828 00000008 00000000 0001 0 0 0 000000FE 0 0 0
> ens160 0004700A 00000000 0001 0 0 100 00FFFFFF 0 0 0
> gre828 2504700A 00000000 0005 0 0 0 FFFFFFFF 0 0 0
> gre828 3704700A 00000000 0005 0 0 0 FFFFFFFF 0 0 0
> gre828 4104700A 00000000 0005 0 0 0 FFFFFFFF 0 0 0
:return: Entries in the ipv4 route priority list from `netstat -rn -f inet` in the linux `/proc/net/route` style
:rtype: list(str)
"""
def _get_netstat_rn_ipv4_routes():
"""
Runs `netstat -rn -f inet` and parses its output and returns a list of routes where the key is the column name
and the value is the value in the column, stripped of leading and trailing whitespace.
:return: List of dictionaries representing routes in the ipv4 route priority list from `netstat -rn -f inet`
:rtype: list(dict)
"""
cmd = ["netstat", "-rn", "-f", "inet"]
output = shellutil.run_command(cmd, log_error=True)
output_lines = output.split("\n")
if len(output_lines) < 3:
raise OSUtilError("`netstat -rn -f inet` output seems to be empty")
output_lines = [line.strip() for line in output_lines if line]
if "Internet:" not in output_lines:
raise OSUtilError("`netstat -rn -f inet` output seems to contain no ipv4 routes")
route_header_line = output_lines.index("Internet:") + 1
# Parse the file structure and left justify the routes
route_start_line = route_header_line + 1
route_line_length = max([len(line) for line in output_lines[route_header_line:]])
netstat_route_list = [line.ljust(route_line_length) for line in output_lines[route_start_line:]]
# Parse the headers
_route_headers = output_lines[route_header_line].split()
n_route_headers = len(_route_headers)
route_columns = {}
for i in range(0, n_route_headers - 1):
route_columns[_route_headers[i]] = (
output_lines[route_header_line].index(_route_headers[i]),
(output_lines[route_header_line].index(_route_headers[i + 1]) - 1)
)
route_columns[_route_headers[n_route_headers - 1]] = (
output_lines[route_header_line].index(_route_headers[n_route_headers - 1]),
None
)
# Parse the routes
netstat_routes = []
n_netstat_routes = len(netstat_route_list)
for i in range(0, n_netstat_routes):
netstat_route = {}
for column in route_columns:
netstat_route[column] = netstat_route_list[i][
route_columns[column][0]:route_columns[column][1]].strip()
netstat_route["Metric"] = n_netstat_routes - i
netstat_routes.append(netstat_route)
# Return the Sections
return netstat_routes
def _ipv4_ascii_address_to_hex(ipv4_ascii_address):
"""
Converts an IPv4 32bit address from its ASCII notation (ie. 127.0.0.1) to an 8 digit padded hex notation
(ie. "0100007F") string.
:return: 8 character long hex string representation of the IP
:rtype: string
"""
# Raises socket.error if the IP is not a valid IPv4
return "%08X" % int(binascii.hexlify(
struct.pack("!I", struct.unpack("=I", socket.inet_pton(socket.AF_INET, ipv4_ascii_address))[0])), 16)
def _ipv4_cidr_mask_to_hex(ipv4_cidr_mask):
"""
Converts an subnet mask from its CIDR integer notation (ie. 32) to an 8 digit padded hex notation
(ie. "FFFFFFFF") string representing its bitmask form.
:return: 8 character long hex string representation of the IP
:rtype: string
"""
return "{0:08x}".format(
struct.unpack("=I", struct.pack("!I", (0xffffffff << (32 - ipv4_cidr_mask)) & 0xffffffff))[0]).upper()
def _ipv4_cidr_destination_to_hex(destination):
"""
Converts an destination address from its CIDR notation (ie. 127.0.0.1/32 or default or localhost) to an 8
digit padded hex notation (ie. "0100007F" or "00000000" or "0100007F") string and its subnet bitmask
also in hex (FFFFFFFF).
:return: tuple of 8 character long hex string representation of the IP and 8 character long hex string representation of the subnet mask
:rtype: tuple(string, int)
"""
destination_ip = "0.0.0.0"
destination_subnetmask = 32
if destination != "default":
if destination == "localhost":
destination_ip = "127.0.0.1"
else:
destination_ip = destination.split("/")
if len(destination_ip) > 1:
destination_subnetmask = int(destination_ip[1])
destination_ip = destination_ip[0]
hex_destination_ip = _ipv4_ascii_address_to_hex(destination_ip)
hex_destination_subnetmask = _ipv4_cidr_mask_to_hex(destination_subnetmask)
return hex_destination_ip, hex_destination_subnetmask
def _try_ipv4_gateway_to_hex(gateway):
"""
If the gateway is an IPv4 address, return its IP in hex, else, return "00000000"
:return: 8 character long hex string representation of the IP of the gateway
:rtype: string
"""
try:
return _ipv4_ascii_address_to_hex(gateway)
except socket.error:
return "00000000"
def _ascii_route_flags_to_bitmask(ascii_route_flags):
"""
Converts route flags to a bitmask of their equivalent linux/route.h values.
:return: integer representation of a 16 bit mask
:rtype: int
"""
bitmask_flags = 0
RTF_UP = 0x0001
RTF_GATEWAY = 0x0002
RTF_HOST = 0x0004
RTF_DYNAMIC = 0x0010
if "U" in ascii_route_flags:
bitmask_flags |= RTF_UP
if "G" in ascii_route_flags:
bitmask_flags |= RTF_GATEWAY
if "H" in ascii_route_flags:
bitmask_flags |= RTF_HOST
if "S" not in ascii_route_flags:
bitmask_flags |= RTF_DYNAMIC
return bitmask_flags
def _freebsd_netstat_rn_route_to_linux_proc_net_route(netstat_route):
"""
Converts a single FreeBSD `netstat -rn -f inet` route to its equivalent /proc/net/route line. ie:
> default 0.0.0.0 UGS 0 247 em1
to
> em1 00000000 00000000 0003 0 0 0 FFFFFFFF 0 0 0
:return: string representation of the equivalent /proc/net/route line
:rtype: string
"""
network_interface = netstat_route["Netif"]
hex_destination_ip, hex_destination_subnetmask = _ipv4_cidr_destination_to_hex(netstat_route["Destination"])
hex_gateway = _try_ipv4_gateway_to_hex(netstat_route["Gateway"])
bitmask_flags = _ascii_route_flags_to_bitmask(netstat_route["Flags"])
dummy_refcount = 0
dummy_use = 0
route_metric = netstat_route["Metric"]
dummy_mtu = 0
dummy_window = 0
dummy_irtt = 0
return "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}".format(
network_interface,
hex_destination_ip,
hex_gateway,
bitmask_flags,
dummy_refcount,
dummy_use,
route_metric,
hex_destination_subnetmask,
dummy_mtu,
dummy_window,
dummy_irtt
)
linux_style_route_file = ["Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT"]
try:
netstat_routes = _get_netstat_rn_ipv4_routes()
# Make sure the `netstat -rn -f inet` contains columns for Netif, Destination, Gateway and Flags which are needed to convert
# to the Linux Format
if len(netstat_routes) > 0:
missing_headers = []
if "Netif" not in netstat_routes[0]:
missing_headers.append("Netif")
if "Destination" not in netstat_routes[0]:
missing_headers.append("Destination")
if "Gateway" not in netstat_routes[0]:
missing_headers.append("Gateway")
if "Flags" not in netstat_routes[0]:
missing_headers.append("Flags")
if missing_headers:
raise KeyError(
"`netstat -rn -f inet` output is missing columns required to convert to the Linux /proc/net/route format; columns are [{0}]".format(
missing_headers))
# Parse the Netstat IPv4 Routes
for netstat_route in netstat_routes:
try:
linux_style_route = _freebsd_netstat_rn_route_to_linux_proc_net_route(netstat_route)
linux_style_route_file.append(linux_style_route)
except Exception:
# Skip the route
continue
except Exception as e:
logger.error("Cannot read route table [{0}]", ustr(e))
return linux_style_route_file
@staticmethod
def get_list_of_routes(route_table):
"""
Construct a list of all network routes known to this system.
:param list(str) route_table: List of text entries from route table, including headers
:return: a list of network routes
:rtype: list(RouteEntry)
"""
route_list = []
count = len(route_table)
if count < 1:
logger.error("netstat -rn -f inet is missing headers")
elif count == 1:
logger.error("netstat -rn -f inet contains no routes")
else:
route_list = DefaultOSUtil._build_route_list(route_table)
return route_list
def get_primary_interface(self):
"""
Get the name of the primary interface, which is the one with the
default route attached to it; if there are multiple default routes,
the primary has the lowest Metric.
:return: the interface which has the default route
"""
RTF_GATEWAY = 0x0002
DEFAULT_DEST = "00000000"
primary_interface = None
if not self.disable_route_warning:
logger.info("Examine `netstat -rn -f inet` for primary interface")
route_table = self.read_route_table()
def is_default(route):
return (route.destination == DEFAULT_DEST) and (RTF_GATEWAY & route.flags)
candidates = list(filter(is_default, self.get_list_of_routes(route_table)))
if len(candidates) > 0:
def get_metric(route):
return int(route.metric)
primary_route = min(candidates, key=get_metric)
primary_interface = primary_route.interface
if primary_interface is None:
primary_interface = ''
if not self.disable_route_warning:
logger.warn('Could not determine primary interface, '
'please ensure routes are correct')
logger.warn('Primary interface examination will retry silently')
self.disable_route_warning = True
else:
logger.info('Primary interface is [{0}]'.format(primary_interface))
self.disable_route_warning = False
return primary_interface
def is_primary_interface(self, ifname):
"""
Indicate whether the specified interface is the primary.
:param ifname: the name of the interface - eth0, lo, etc.
:return: True if this interface binds the default route
"""
return self.get_primary_interface() == ifname
def is_loopback(self, ifname):
"""
Determine if a named interface is loopback.
"""
return ifname.startswith("lo")
def route_add(self, net, mask, gateway):
cmd = 'route add {0} {1} {2}'.format(net, gateway, mask)
return shellutil.run(cmd, chk_err=False)
def is_missing_default_route(self):
"""
For FreeBSD, the default broadcast goes to current default gw, not a all-ones broadcast address, need to
specify the route manually to get it work in a VNET environment.
SEE ALSO: man ip(4) IP_ONESBCAST,
"""
RTF_GATEWAY = 0x0002
DEFAULT_DEST = "00000000"
route_table = self.read_route_table()
routes = self.get_list_of_routes(route_table)
for route in routes:
if (route.destination == DEFAULT_DEST) and (RTF_GATEWAY & route.flags):
return False
return True
def is_dhcp_enabled(self):
return True
def start_dhcp_service(self):
shellutil.run("/etc/rc.d/dhclient start {0}".format(self.get_if_name()), chk_err=False)
def allow_dhcp_broadcast(self):
pass
def set_route_for_dhcp_broadcast(self, ifname):
return shellutil.run("route add 255.255.255.255 -iface {0}".format(ifname), chk_err=False)
def remove_route_for_dhcp_broadcast(self, ifname):
shellutil.run("route delete 255.255.255.255 -iface {0}".format(ifname), chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pgrep", "-n", "dhclient"])
def eject_dvd(self, chk_err=True):
dvd = self.get_dvd_device()
retcode = shellutil.run("cdcontrol -f {0} eject".format(dvd))
if chk_err and retcode != 0:
raise OSUtilError("Failed to eject dvd: ret={0}".format(retcode))
def restart_if(self, ifname, retries=None, wait=None):
# Restart dhclient only to publish hostname
shellutil.run("/etc/rc.d/dhclient restart {0}".format(ifname), chk_err=False)
def get_total_mem(self):
cmd = "sysctl hw.physmem |awk '{print $2}'"
ret, output = shellutil.run_get_output(cmd)
if ret:
raise OSUtilError("Failed to get total memory: {0}".format(output))
try:
return int(output) / 1024 / 1024
except ValueError:
raise OSUtilError("Failed to get total memory: {0}".format(output))
def get_processor_cores(self):
ret, output = shellutil.run_get_output("sysctl hw.ncpu |awk '{print $2}'")
if ret:
raise OSUtilError("Failed to get processor cores.")
try:
return int(output)
except ValueError:
raise OSUtilError("Failed to get total memory: {0}".format(output))
def set_scsi_disks_timeout(self, timeout):
if self._scsi_disks_timeout_set:
return
ret, output = shellutil.run_get_output('sysctl kern.cam.da.default_timeout={0}'.format(timeout))
if ret:
raise OSUtilError("Failed set SCSI disks timeout: {0}".format(output))
self._scsi_disks_timeout_set = True
def check_pid_alive(self, pid):
return shellutil.run('ps -p {0}'.format(pid), chk_err=False) == 0
@staticmethod
def _get_net_info():
"""
There is no SIOCGIFCONF
on freeBSD - just parse ifconfig.
Returns strings: iface, inet4_addr, and mac
or 'None,None,None' if unable to parse.
We will sleep and retry as the network must be up.
"""
iface = ''
inet = ''
mac = ''
err, output = shellutil.run_get_output('ifconfig -l ether', chk_err=False)
if err:
raise OSUtilError("Can't find ether interface:{0}".format(output))
ifaces = output.split()
if not ifaces:
raise OSUtilError("Can't find ether interface.")
iface = ifaces[0]
err, output = shellutil.run_get_output('ifconfig ' + iface, chk_err=False)
if err:
raise OSUtilError("Can't get info for interface:{0}".format(iface))
for line in output.split('\n'):
if line.find('inet ') != -1:
inet = line.split()[1]
elif line.find('ether ') != -1:
mac = line.split()[1]
logger.verbose("Interface info: ({0},{1},{2})", iface, inet, mac)
return iface, inet, mac
def device_for_ide_port(self, port_id):
"""
Return device name attached to ide port 'n'.
"""
if port_id > 3:
return None
g0 = "00000000"
if port_id > 1:
g0 = "00000001"
port_id = port_id - 2
err, output = shellutil.run_get_output('sysctl dev.storvsc | grep pnpinfo | grep deviceid=')
if err:
return None
g1 = "000" + ustr(port_id)
g0g1 = "{0}-{1}".format(g0, g1)
# pylint: disable=W0105
"""
search 'X' from 'dev.storvsc.X.%pnpinfo: classid=32412632-86cb-44a2-9b5c-50d1417354f5 deviceid=00000000-0001-8899-0000-000000000000'
"""
# pylint: enable=W0105
cmd_search_ide = "sysctl dev.storvsc | grep pnpinfo | grep deviceid={0}".format(g0g1)
err, output = shellutil.run_get_output(cmd_search_ide)
if err:
return None
cmd_extract_id = cmd_search_ide + "|awk -F . '{print $3}'"
err, output = shellutil.run_get_output(cmd_extract_id)
# pylint: disable=W0105
"""
try to search 'blkvscX' and 'storvscX' to find device name
"""
# pylint: enable=W0105
output = output.rstrip()
cmd_search_blkvsc = "camcontrol devlist -b | grep blkvsc{0} | awk '{{print $1}}'".format(output)
err, output = shellutil.run_get_output(cmd_search_blkvsc)
if err == 0:
output = output.rstrip()
cmd_search_dev = "camcontrol devlist | grep {0} | awk -F \( '{{print $2}}'|sed -e 's/.*(//'| sed -e 's/).*//'".format(output) # pylint: disable=W1401
err, output = shellutil.run_get_output(cmd_search_dev)
if err == 0:
for possible in output.rstrip().split(','):
if not possible.startswith('pass'):
return possible
cmd_search_storvsc = "camcontrol devlist -b | grep storvsc{0} | awk '{{print $1}}'".format(output)
err, output = shellutil.run_get_output(cmd_search_storvsc)
if err == 0:
output = output.rstrip()
cmd_search_dev = "camcontrol devlist | grep {0} | awk -F \( '{{print $2}}'|sed -e 's/.*(//'| sed -e 's/).*//'".format(output) # pylint: disable=W1401
err, output = shellutil.run_get_output(cmd_search_dev)
if err == 0:
for possible in output.rstrip().split(','):
if not possible.startswith('pass'):
return possible
return None
@staticmethod
def get_total_cpu_ticks_since_boot():
return 0
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/gaia.py 0000664 0000000 0000000 00000016761 14626177470 0025052 0 ustar 00root root 0000000 0000000 #
# Copyright 2017 Check Point Software Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import base64
import socket
import struct
import time
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common.exception import OSUtilError
from azurelinuxagent.common.future import ustr, bytebuffer, range, int # pylint: disable=redefined-builtin
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.osutil.default import DefaultOSUtil
from azurelinuxagent.common.utils.cryptutil import CryptUtil
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.utils.textutil as textutil
class GaiaOSUtil(DefaultOSUtil):
def __init__(self): # pylint: disable=W0235
super(GaiaOSUtil, self).__init__()
def _run_clish(self, cmd):
ret = 0
out = ""
for i in range(10): # pylint: disable=W0612
try:
final_command = ["/bin/clish", "-s", "-c", "'{0}'".format(cmd)]
out = shellutil.run_command(final_command, log_error=True)
ret = 0
break
except shellutil.CommandError as e:
ret = e.returncode
out = e.stdout
except Exception as e:
ret = -1
out = ustr(e)
if 'NMSHST0025' in out: # Entry for [hostname] already present
ret = 0
break
time.sleep(2)
return ret, out
def useradd(self, username, expiration=None, comment=None):
logger.warn('useradd is not supported on GAiA')
def chpasswd(self, username, password, crypt_id=6, salt_len=10):
logger.info('chpasswd')
passwd_hash = textutil.gen_password_hash(password, crypt_id, salt_len)
ret, out = self._run_clish(
'set user admin password-hash ' + passwd_hash)
if ret != 0:
raise OSUtilError(("Failed to set password for {0}: {1}"
"").format('admin', out))
def conf_sudoer(self, username, nopasswd=False, remove=False):
logger.info('conf_sudoer is not supported on GAiA')
def del_root_password(self):
logger.info('del_root_password')
ret, out = self._run_clish('set user admin password-hash *LOCK*') # pylint: disable=W0612
if ret != 0:
raise OSUtilError("Failed to delete root password")
def _replace_user(self, path, username):
if path.startswith('$HOME'):
path = '/home' + path[5:]
parts = path.split('/')
parts[2] = username
return '/'.join(parts)
def deploy_ssh_keypair(self, username, keypair):
logger.info('deploy_ssh_keypair')
username = 'admin'
path, thumbprint = keypair
path = self._replace_user(path, username)
super(GaiaOSUtil, self).deploy_ssh_keypair(
username, (path, thumbprint))
def openssl_to_openssh(self, input_file, output_file):
cryptutil = CryptUtil(conf.get_openssl_cmd())
ret, out = shellutil.run_get_output(
conf.get_openssl_cmd() +
" rsa -pubin -noout -text -in '" + input_file + "'")
if ret != 0:
raise OSUtilError('openssl failed with {0}'.format(ret))
modulus = []
exponent = []
buf = None
for line in out.split('\n'):
if line.startswith('Modulus:'):
buf = modulus
buf.append(line)
continue
if line.startswith('Exponent:'):
buf = exponent
buf.append(line)
continue
if buf and line:
buf.append(line.strip().replace(':', ''))
def text_to_num(buf):
if len(buf) == 1:
return int(buf[0].split()[1])
return int(''.join(buf[1:]), 16)
n = text_to_num(modulus)
e = text_to_num(exponent)
keydata = bytearray()
keydata.extend(struct.pack('>I', len('ssh-rsa')))
keydata.extend(b'ssh-rsa')
keydata.extend(struct.pack('>I', len(cryptutil.num_to_bytes(e))))
keydata.extend(cryptutil.num_to_bytes(e))
keydata.extend(struct.pack('>I', len(cryptutil.num_to_bytes(n)) + 1))
keydata.extend(b'\0')
keydata.extend(cryptutil.num_to_bytes(n))
keydata_base64 = base64.b64encode(bytebuffer(keydata))
fileutil.write_file(output_file,
ustr(b'ssh-rsa ' + keydata_base64 + b'\n',
encoding='utf-8'))
def deploy_ssh_pubkey(self, username, pubkey):
logger.info('deploy_ssh_pubkey')
username = 'admin'
path, thumbprint, value = pubkey
path = self._replace_user(path, username)
super(GaiaOSUtil, self).deploy_ssh_pubkey(
username, (path, thumbprint, value))
def eject_dvd(self, chk_err=True):
logger.warn('eject is not supported on GAiA')
def mount(self, device, mount_point, option=None, chk_err=True):
if not option:
option = []
if any('udf,iso9660' in opt for opt in option):
ret, out = super(GaiaOSUtil, self).mount(device, mount_point,
option=[opt.replace('udf,iso9660', 'udf') for opt in option],
chk_err=chk_err)
if not ret:
return ret, out
return super(GaiaOSUtil, self).mount(
device, mount_point, option=option, chk_err=chk_err)
def allow_dhcp_broadcast(self):
logger.info('allow_dhcp_broadcast is ignored on GAiA')
def remove_rules_files(self, rules_files=''):
pass
def restore_rules_files(self, rules_files=''):
logger.info('restore_rules_files is ignored on GAiA')
def restart_ssh_service(self):
return shellutil.run('/sbin/service sshd condrestart', chk_err=False)
def _address_to_string(self, addr):
return socket.inet_ntoa(struct.pack("!I", addr))
def _get_prefix(self, mask):
return str(sum([bin(int(x)).count('1') for x in mask.split('.')]))
def route_add(self, net, mask, gateway):
logger.info('route_add {0} {1} {2}', net, mask, gateway)
if net == 0 and mask == 0:
cidr = 'default'
else:
cidr = self._address_to_string(net) + '/' + self._get_prefix(
self._address_to_string(mask))
ret, out = self._run_clish( # pylint: disable=W0612
'set static-route ' + cidr +
' nexthop gateway address ' +
self._address_to_string(gateway) + ' on')
return ret
def set_hostname(self, hostname):
logger.warn('set_hostname is ignored on GAiA')
def set_dhcp_hostname(self, hostname):
logger.warn('set_dhcp_hostname is ignored on GAiA')
def publish_hostname(self, hostname, recover_nic=False):
logger.warn('publish_hostname is ignored on GAiA')
def del_account(self, username):
logger.warn('del_account is ignored on GAiA')
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/iosxe.py 0000664 0000000 0000000 00000006746 14626177470 0025302 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil.default import DefaultOSUtil, PRODUCT_ID_FILE, DMIDECODE_CMD, UUID_PATTERN
from azurelinuxagent.common.utils import textutil, fileutil # pylint: disable=W0611
# pylint: disable=W0105
'''
The IOSXE distribution is a variant of the Centos distribution,
version 7.1.
The primary difference is that IOSXE makes some assumptions about
the waagent environment:
- only the waagent daemon is executed
- no provisioning is performed
- no DHCP-based services are available
'''
# pylint: enable=W0105
class IosxeOSUtil(DefaultOSUtil):
def __init__(self): # pylint: disable=W0235
super(IosxeOSUtil, self).__init__()
@staticmethod
def get_systemd_unit_file_install_path():
return "/usr/lib/systemd/system"
def set_hostname(self, hostname):
"""
Unlike redhat 6.x, redhat 7.x will set hostname via hostnamectl
Due to a bug in systemd in Centos-7.0, if this call fails, fallback
to hostname.
"""
hostnamectl_cmd = ["hostnamectl", "set-hostname", hostname, "--static"]
try:
shellutil.run_command(hostnamectl_cmd)
except Exception as e:
logger.warn("[{0}] failed with error: {1}, attempting fallback".format(' '.join(hostnamectl_cmd), ustr(e)))
DefaultOSUtil.set_hostname(self, hostname)
def publish_hostname(self, hostname, recover_nic=False):
"""
Restart NetworkManager first before publishing hostname
"""
shellutil.run("service NetworkManager restart")
super(IosxeOSUtil, self).publish_hostname(hostname, recover_nic)
def register_agent_service(self):
return shellutil.run("systemctl enable waagent", chk_err=False)
def unregister_agent_service(self):
return shellutil.run("systemctl disable waagent", chk_err=False)
def openssl_to_openssh(self, input_file, output_file):
DefaultOSUtil.openssl_to_openssh(self, input_file, output_file)
def is_dhcp_available(self):
return False
def get_instance_id(self):
'''
Azure records a UUID as the instance ID
First check /sys/class/dmi/id/product_uuid.
If that is missing, then extracts from dmidecode
If nothing works (for old VMs), return the empty string
'''
if os.path.isfile(PRODUCT_ID_FILE):
try:
s = fileutil.read_file(PRODUCT_ID_FILE).strip()
return self._correct_instance_id(s.strip())
except IOError:
pass
rc, s = shellutil.run_get_output(DMIDECODE_CMD)
if rc != 0 or UUID_PATTERN.match(s) is None:
return ""
return self._correct_instance_id(s.strip())
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/mariner.py 0000664 0000000 0000000 00000004715 14626177470 0025602 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class MarinerOSUtil(DefaultOSUtil):
def __init__(self):
super(MarinerOSUtil, self).__init__()
self.jit_enabled = True
@staticmethod
def get_systemd_unit_file_install_path():
return "/usr/lib/systemd/system"
@staticmethod
def get_agent_bin_path():
return "/usr/bin"
def is_dhcp_enabled(self):
return True
def start_network(self):
self._run_command_without_raising(["systemctl", "start", "systemd-networkd"], log_error=False)
def restart_if(self, ifname=None, retries=None, wait=None):
self._run_command_without_raising(["systemctl", "restart", "systemd-networkd"])
def restart_ssh_service(self):
self._run_command_without_raising(["systemctl", "restart", "sshd"])
def stop_dhcp_service(self):
self._run_command_without_raising(["systemctl", "stop", "systemd-networkd"], log_error=False)
def start_dhcp_service(self):
self._run_command_without_raising(["systemctl", "start", "systemd-networkd"], log_error=False)
def start_agent_service(self):
self._run_command_without_raising(["systemctl", "start", "{0}".format(self.service_name)], log_error=False)
def stop_agent_service(self):
self._run_command_without_raising(["systemctl", "stop", "{0}".format(self.service_name)], log_error=False)
def register_agent_service(self):
self._run_command_without_raising(["systemctl", "enable", "{0}".format(self.service_name)], log_error=False)
def unregister_agent_service(self):
self._run_command_without_raising(["systemctl", "disable", "{0}".format(self.service_name)], log_error=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "systemd-networkd"])
def conf_sshd(self, disable_password):
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/nsbsd.py 0000664 0000000 0000000 00000013620 14626177470 0025251 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Stormshield
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.exception import OSUtilError
from azurelinuxagent.common.osutil.freebsd import FreeBSDOSUtil
class NSBSDOSUtil(FreeBSDOSUtil):
resolver = None
def __init__(self):
super(NSBSDOSUtil, self).__init__()
self.agent_conf_file_path = '/etc/waagent.conf'
if self.resolver is None:
# NSBSD doesn't have a system resolver, configure a python one
try:
import dns.resolver
except ImportError:
raise OSUtilError("Python DNS resolver not available. Cannot proceed!")
self.resolver = dns.resolver.Resolver(configure=False)
servers = []
cmd = "getconf /usr/Firewall/ConfigFiles/dns Servers | tail -n +2"
ret, output = shellutil.run_get_output(cmd) # pylint: disable=W0612
for server in output.split("\n"):
if server == '':
break
server = server[:-1] # remove last '='
cmd = "grep '{}' /etc/hosts".format(server) + " | awk '{print $1}'"
ret, ip = shellutil.run_get_output(cmd)
ip = ip.strip() # Remove new line char
servers.append(ip)
self.resolver.nameservers = servers
dns.resolver.override_system_resolver(self.resolver)
def set_hostname(self, hostname):
self._run_command_without_raising(
['/usr/Firewall/sbin/setconf', '/usr/Firewall/System/global', 'SystemName', hostname])
self._run_command_without_raising(["/usr/Firewall/sbin/enlog"])
self._run_command_without_raising(["/usr/Firewall/sbin/enproxy", "-u"])
self._run_command_without_raising(["/usr/Firewall/sbin/ensl", "-u"])
self._run_command_without_raising(["/usr/Firewall/sbin/ennetwork", "-f"])
def restart_ssh_service(self):
return shellutil.run('/usr/Firewall/sbin/enservice', chk_err=False)
def conf_sshd(self, disable_password):
option = "0" if disable_password else "1"
shellutil.run('setconf /usr/Firewall/ConfigFiles/system SSH State 1',
chk_err=False)
shellutil.run('setconf /usr/Firewall/ConfigFiles/system SSH Password {}'.format(option),
chk_err=False)
shellutil.run('enservice', chk_err=False)
logger.info("{0} SSH password-based authentication methods."
.format("Disabled" if disable_password else "Enabled"))
def get_root_username(self):
return "admin"
def useradd(self, username, expiration=None, comment=None):
"""
Create user account with 'username'
"""
logger.warn("User creation disabled")
def del_account(self, username):
logger.warn("User deletion disabled")
def conf_sudoer(self, username, nopasswd=False, remove=False):
logger.warn("Sudo is not enabled")
def chpasswd(self, username, password, crypt_id=6, salt_len=10):
self._run_command_raising_OSUtilError(["/usr/Firewall/sbin/fwpasswd", "-p", password],
err_msg="Failed to set password for admin")
# password set, activate webadmin and ssh access
commands = [['setconf', '/usr/Firewall/ConfigFiles/webadmin', 'ACL', 'any'], ['ensl']]
self._run_multiple_commands_without_raising(commands, log_error=False, continue_on_error=False)
def deploy_ssh_pubkey(self, username, pubkey):
"""
Deploy authorized_key
"""
path, thumbprint, value = pubkey # pylint: disable=W0612
# overide parameters
super(NSBSDOSUtil, self).deploy_ssh_pubkey('admin',
["/usr/Firewall/.ssh/authorized_keys", thumbprint, value])
def del_root_password(self):
logger.warn("Root password deletion disabled")
def start_dhcp_service(self):
shellutil.run("/usr/Firewall/sbin/nstart dhclient", chk_err=False)
def stop_dhcp_service(self):
shellutil.run("/usr/Firewall/sbin/nstop dhclient", chk_err=False)
def get_dhcp_pid(self):
ret = ""
pidfile = "/var/run/dhclient.pid"
if os.path.isfile(pidfile):
ret = fileutil.read_file(pidfile, encoding='ascii')
return self._text_to_pid_list(ret)
def eject_dvd(self, chk_err=True):
pass
def restart_if(self, ifname=None, retries=None, wait=None):
# Restart dhclient only to publish hostname
shellutil.run("ennetwork", chk_err=False)
def set_dhcp_hostname(self, hostname):
# already done by the dhcp client
pass
def get_firewall_dropped_packets(self, dst_ip=None):
# disable iptables methods
return 0
def get_firewall_will_wait(self):
# disable iptables methods
return ""
def _delete_rule(self, rule):
# disable iptables methods
return
def remove_firewall(self, dst_ip=None, uid=None, wait=""):
# disable iptables methods
return True
def enable_firewall(self, dst_ip=None, uid=None):
# disable iptables methods
return True, True
def get_firewall_list(self, wait=""):
# disable iptables methods
return ""
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/openbsd.py 0000664 0000000 0000000 00000032504 14626177470 0025574 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
# Copyright 2017 Reyk Floeter
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and OpenSSL 1.0+
import os
import re
import time
import glob
import datetime
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common.exception import OSUtilError
from azurelinuxagent.common.osutil.default import DefaultOSUtil
UUID_PATTERN = re.compile(
r'^\s*[A-F0-9]{8}(?:\-[A-F0-9]{4}){3}\-[A-F0-9]{12}\s*$',
re.IGNORECASE)
class OpenBSDOSUtil(DefaultOSUtil):
def __init__(self):
super(OpenBSDOSUtil, self).__init__()
self.jit_enabled = True
self._scsi_disks_timeout_set = False
@staticmethod
def get_agent_bin_path():
return "/usr/local/sbin"
def get_instance_id(self):
ret, output = shellutil.run_get_output("sysctl -n hw.uuid")
if ret != 0 or UUID_PATTERN.match(output) is None:
return ""
return output.strip()
def set_hostname(self, hostname):
fileutil.write_file("/etc/myname", "{}\n".format(hostname))
self._run_command_without_raising(["hostname", hostname], log_error=False)
def restart_ssh_service(self):
return shellutil.run('rcctl restart sshd', chk_err=False)
def start_agent_service(self):
return shellutil.run('rcctl start {0}'.format(self.service_name), chk_err=False)
def stop_agent_service(self):
return shellutil.run('rcctl stop {0}'.format(self.service_name), chk_err=False)
def register_agent_service(self):
shellutil.run('chmod 0555 /etc/rc.d/{0}'.format(self.service_name), chk_err=False)
return shellutil.run('rcctl enable {0}'.format(self.service_name), chk_err=False)
def unregister_agent_service(self):
return shellutil.run('rcctl disable {0}'.format(self.service_name), chk_err=False)
def del_account(self, username):
if self.is_sys_user(username):
logger.error("{0} is a system user. Will not delete it.", username)
self._run_command_without_raising(["touch", "/var/run/utmp"])
self._run_command_without_raising(["userdel", "-r", username])
self.conf_sudoer(username, remove=True)
def conf_sudoer(self, username, nopasswd=False, remove=False):
doas_conf = "/etc/doas.conf"
doas = None
if not remove:
if not os.path.isfile(doas_conf):
# always allow root to become root
doas = "permit keepenv nopass root\n"
fileutil.append_file(doas_conf, doas)
if nopasswd:
doas = "permit keepenv nopass {0}\n".format(username)
else:
doas = "permit keepenv persist {0}\n".format(username)
fileutil.append_file(doas_conf, doas)
fileutil.chmod(doas_conf, 0o644)
else:
# Remove user from doas.conf
if os.path.isfile(doas_conf):
try:
content = fileutil.read_file(doas_conf)
doas = content.split("\n")
doas = [x for x in doas if username not in x]
fileutil.write_file(doas_conf, "\n".join(doas))
except IOError as err:
raise OSUtilError("Failed to remove sudoer: "
"{0}".format(err))
def chpasswd(self, username, password, crypt_id=6, salt_len=10):
if self.is_sys_user(username):
raise OSUtilError(("User {0} is a system user. "
"Will not set passwd.").format(username))
output = self._run_command_raising_OSUtilError(['encrypt'], cmd_input=password,
err_msg="Failed to encrypt password for {0}".format(username))
passwd_hash = output.strip()
self._run_command_raising_OSUtilError(['usermod', '-p', passwd_hash, username],
err_msg="Failed to set password for {0}".format(username))
def del_root_password(self):
ret, output = shellutil.run_get_output('usermod -p "*" root')
if ret:
raise OSUtilError("Failed to delete root password: "
"{0}".format(output))
def get_if_mac(self, ifname):
data = self._get_net_info()
if data[0] == ifname:
return data[2].replace(':', '').upper()
return None
def get_first_if(self):
return self._get_net_info()[:2]
def route_add(self, net, mask, gateway):
cmd = 'route add {0} {1} {2}'.format(net, gateway, mask)
return shellutil.run(cmd, chk_err=False)
def is_missing_default_route(self):
ret = shellutil.run("route -n get default", chk_err=False)
if ret == 0:
return False
return True
def is_dhcp_enabled(self):
pass
def start_dhcp_service(self):
pass
def stop_dhcp_service(self):
pass
def get_dhcp_lease_endpoint(self):
"""
OpenBSD has a sligthly different lease file format.
"""
endpoint = None
pathglob = '/var/db/dhclient.leases.{}'.format(self.get_first_if()[0])
HEADER_LEASE = "lease"
HEADER_OPTION = "option option-245"
HEADER_EXPIRE = "expire"
FOOTER_LEASE = "}"
FORMAT_DATETIME = "%Y/%m/%d %H:%M:%S %Z"
logger.info("looking for leases in path [{0}]".format(pathglob))
for lease_file in glob.glob(pathglob):
leases = open(lease_file).read()
if HEADER_OPTION in leases:
cached_endpoint = None
has_option_245 = False
expired = True # assume expired
for line in leases.splitlines():
if line.startswith(HEADER_LEASE):
cached_endpoint = None
has_option_245 = False
expired = True
elif HEADER_OPTION in line:
try:
ipaddr = line.split(" ")[-1].strip(";").split(":")
cached_endpoint = \
".".join(str(int(d, 16)) for d in ipaddr)
has_option_245 = True
except ValueError:
logger.error("could not parse '{0}'".format(line))
elif HEADER_EXPIRE in line:
if "never" in line:
expired = False
else:
try:
expire_string = line.split(
" ", 4)[-1].strip(";")
expire_date = datetime.datetime.strptime(
expire_string, FORMAT_DATETIME)
if expire_date > datetime.datetime.utcnow():
expired = False
except ValueError:
logger.error("could not parse expiry token "
"'{0}'".format(line))
elif FOOTER_LEASE in line:
logger.info("dhcp entry:{0}, 245:{1}, expired: {2}"
.format(cached_endpoint, has_option_245, expired))
if not expired and cached_endpoint is not None and has_option_245:
endpoint = cached_endpoint
logger.info("found endpoint [{0}]".format(endpoint))
# we want to return the last valid entry, so
# keep searching
if endpoint is not None:
logger.info("cached endpoint found [{0}]".format(endpoint))
else:
logger.info("cached endpoint not found")
return endpoint
def allow_dhcp_broadcast(self):
pass
def set_route_for_dhcp_broadcast(self, ifname):
return shellutil.run("route add 255.255.255.255 -iface "
"{0}".format(ifname), chk_err=False)
def remove_route_for_dhcp_broadcast(self, ifname):
shellutil.run("route delete 255.255.255.255 -iface "
"{0}".format(ifname), chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pgrep", "-n", "dhclient"])
def get_dvd_device(self, dev_dir='/dev'):
pattern = r'cd[0-9]c'
for dvd in [re.match(pattern, dev) for dev in os.listdir(dev_dir)]:
if dvd is not None:
return "/dev/{0}".format(dvd.group(0))
raise OSUtilError("Failed to get DVD device")
def mount_dvd(self,
max_retry=6,
chk_err=True,
dvd_device=None,
mount_point=None,
sleep_time=5):
if dvd_device is None:
dvd_device = self.get_dvd_device()
if mount_point is None:
mount_point = conf.get_dvd_mount_point()
if not os.path.isdir(mount_point):
os.makedirs(mount_point)
for retry in range(0, max_retry):
retcode = self.mount(dvd_device,
mount_point,
option=["-o", "ro", "-t", "udf"],
chk_err=False)
if retcode == 0:
logger.info("Successfully mounted DVD")
return
if retry < max_retry - 1:
mountlist = shellutil.run_get_output("/sbin/mount")[1]
existing = self.get_mount_point(mountlist, dvd_device)
if existing is not None:
logger.info("{0} is mounted at {1}", dvd_device, existing)
return
logger.warn("Mount DVD failed: retry={0}, ret={1}", retry,
retcode)
time.sleep(sleep_time)
if chk_err:
raise OSUtilError("Failed to mount DVD.")
def eject_dvd(self, chk_err=True):
dvd = self.get_dvd_device()
retcode = shellutil.run("cdio eject {0}".format(dvd))
if chk_err and retcode != 0:
raise OSUtilError("Failed to eject DVD: ret={0}".format(retcode))
def restart_if(self, ifname, retries=3, wait=5):
# Restart dhclient only to publish hostname
shellutil.run("/sbin/dhclient {0}".format(ifname), chk_err=False)
def get_total_mem(self):
ret, output = shellutil.run_get_output("sysctl -n hw.physmem")
if ret:
raise OSUtilError("Failed to get total memory: {0}".format(output))
try:
return int(output)/1024/1024
except ValueError:
raise OSUtilError("Failed to get total memory: {0}".format(output))
def get_processor_cores(self):
ret, output = shellutil.run_get_output("sysctl -n hw.ncpu")
if ret:
raise OSUtilError("Failed to get processor cores.")
try:
return int(output)
except ValueError:
raise OSUtilError("Failed to get total memory: {0}".format(output))
def set_scsi_disks_timeout(self, timeout):
pass
def check_pid_alive(self, pid): # pylint: disable=R1710
if not pid:
return
return shellutil.run('ps -p {0}'.format(pid), chk_err=False) == 0
@staticmethod
def _get_net_info():
"""
There is no SIOCGIFCONF
on OpenBSD - just parse ifconfig.
Returns strings: iface, inet4_addr, and mac
or 'None,None,None' if unable to parse.
We will sleep and retry as the network must be up.
"""
iface = ''
inet = ''
mac = ''
ret, output = shellutil.run_get_output(
'ifconfig hvn | grep -E "^hvn.:" | sed "s/:.*//g"', chk_err=False)
if ret:
raise OSUtilError("Can't find ether interface:{0}".format(output))
ifaces = output.split()
if not ifaces:
raise OSUtilError("Can't find ether interface.")
iface = ifaces[0]
ret, output = shellutil.run_get_output(
'ifconfig ' + iface, chk_err=False)
if ret:
raise OSUtilError("Can't get info for interface:{0}".format(iface))
for line in output.split('\n'):
if line.find('inet ') != -1:
inet = line.split()[1]
elif line.find('lladdr ') != -1:
mac = line.split()[1]
logger.verbose("Interface info: ({0},{1},{2})", iface, inet, mac)
return iface, inet, mac
def device_for_ide_port(self, port_id):
"""
Return device name attached to ide port 'n'.
"""
return "wd{0}".format(port_id)
@staticmethod
def get_total_cpu_ticks_since_boot():
return 0
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/openwrt.py 0000664 0000000 0000000 00000013620 14626177470 0025636 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
# Copyright 2018 Sonus Networks, Inc. (d.b.a. Ribbon Communications Operating Company)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import re
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.utils.fileutil as fileutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
from azurelinuxagent.common.utils.networkutil import NetworkInterfaceCard
class OpenWRTOSUtil(DefaultOSUtil):
def __init__(self):
super(OpenWRTOSUtil, self).__init__()
self.agent_conf_file_path = '/etc/waagent.conf'
self.dhclient_name = 'udhcpc'
self.ip_command_output = re.compile('^\d+:\s+(\w+):\s+(.*)$') # pylint: disable=W1401
self.jit_enabled = True
def eject_dvd(self, chk_err=True):
logger.warn('eject is not supported on OpenWRT')
def useradd(self, username, expiration=None, comment=None):
"""
Create user account with 'username'
"""
userentry = self.get_userentry(username)
if userentry is not None:
logger.info("User {0} already exists, skip useradd", username)
return
if expiration is not None:
cmd = ["useradd", "-m", username, "-s", "/bin/ash", "-e", expiration]
else:
cmd = ["useradd", "-m", username, "-s", "/bin/ash"]
if not os.path.exists("/home"):
os.mkdir("/home")
if comment is not None:
cmd.extend(["-c", comment])
self._run_command_raising_OSUtilError(cmd, err_msg="Failed to create user account:{0}".format(username))
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", self.dhclient_name])
def get_nic_state(self, as_string=False):
"""
Capture NIC state (IPv4 and IPv6 addresses plus link state).
:return: Dictionary of NIC state objects, with the NIC name as key
:rtype: dict(str,NetworkInformationCard)
"""
if as_string: # as_string not supported on open wrt
return ''
state = {}
status, output = shellutil.run_get_output("ip -o link", chk_err=False, log_cmd=False)
if status != 0:
logger.verbose("Could not fetch NIC link info; status {0}, {1}".format(status, output))
return {}
for entry in output.splitlines():
result = self.ip_command_output.match(entry)
if result:
name = result.group(1)
state[name] = NetworkInterfaceCard(name, result.group(2))
self._update_nic_state(state, "ip -o -f inet address", NetworkInterfaceCard.add_ipv4, "an IPv4 address")
self._update_nic_state(state, "ip -o -f inet6 address", NetworkInterfaceCard.add_ipv6, "an IPv6 address")
return state
def _update_nic_state(self, state, ip_command, handler, description):
"""
Update the state of NICs based on the output of a specified ip subcommand.
:param dict(str, NetworkInterfaceCard) state: Dictionary of NIC state objects
:param str ip_command: The ip command to run
:param handler: A method on the NetworkInterfaceCard class
:param str description: Description of the particular information being added to the state
"""
status, output = shellutil.run_get_output(ip_command, chk_err=True)
if status != 0:
return
for entry in output.splitlines():
result = self.ip_command_output.match(entry)
if result:
interface_name = result.group(1)
if interface_name in state:
handler(state[interface_name], result.group(2))
else:
logger.error("Interface {0} has {1} but no link state".format(interface_name, description))
def is_dhcp_enabled(self):
pass
def start_dhcp_service(self):
pass
def stop_dhcp_service(self):
pass
def start_network(self) :
return shellutil.run("/etc/init.d/network start", chk_err=True)
def restart_ssh_service(self): # pylint: disable=R1710
# Since Dropbear is the default ssh server on OpenWRt, lets do a sanity check
if os.path.exists("/etc/init.d/sshd"):
return shellutil.run("/etc/init.d/sshd restart", chk_err=True)
else:
logger.warn("sshd service does not exists")
def stop_agent_service(self):
return shellutil.run("/etc/init.d/{0} stop".format(self.service_name), chk_err=True)
def start_agent_service(self):
return shellutil.run("/etc/init.d/{0} start".format(self.service_name), chk_err=True)
def register_agent_service(self):
return shellutil.run("/etc/init.d/{0} enable".format(self.service_name), chk_err=True)
def unregister_agent_service(self):
return shellutil.run("/etc/init.d/{0} disable".format(self.service_name), chk_err=True)
def set_hostname(self, hostname):
fileutil.write_file('/etc/hostname', hostname)
commands = [['uci', 'set', 'system.@system[0].hostname={0}'.format(hostname)], ['uci', 'commit', 'system'],
['/etc/init.d/system', 'reload']]
self._run_multiple_commands_without_raising(commands, log_error=False, continue_on_error=False)
def remove_rules_files(self, rules_files=""):
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/photonos.py 0000664 0000000 0000000 00000004016 14626177470 0026010 0 ustar 00root root 0000000 0000000 #
# Copyright 2021 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class PhotonOSUtil(DefaultOSUtil):
def __init__(self):
super(PhotonOSUtil, self).__init__()
self.agent_conf_file_path = '/etc/waagent.conf'
@staticmethod
def get_systemd_unit_file_install_path():
return '/usr/lib/systemd/system'
@staticmethod
def get_agent_bin_path():
return '/usr/bin'
def is_dhcp_enabled(self):
return True
def start_network(self) :
return shellutil.run('systemctl start systemd-networkd', chk_err=False)
def restart_if(self, ifname=None, retries=None, wait=None):
shellutil.run('systemctl restart systemd-networkd')
def restart_ssh_service(self):
shellutil.run('systemctl restart sshd')
def stop_dhcp_service(self):
return shellutil.run('systemctl stop systemd-networkd', chk_err=False)
def start_dhcp_service(self):
return shellutil.run('systemctl start systemd-networkd', chk_err=False)
def start_agent_service(self):
return shellutil.run('systemctl start waagent', chk_err=False)
def stop_agent_service(self):
return shellutil.run('systemctl stop waagent', chk_err=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(['pidof', 'systemd-networkd'])
def conf_sshd(self, disable_password):
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/redhat.py 0000664 0000000 0000000 00000032661 14626177470 0025415 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os # pylint: disable=W0611
import re # pylint: disable=W0611
import pwd # pylint: disable=W0611
import shutil # pylint: disable=W0611
import socket # pylint: disable=W0611
import array # pylint: disable=W0611
import struct # pylint: disable=W0611
import fcntl # pylint: disable=W0611
import time # pylint: disable=W0611
import base64 # pylint: disable=W0611
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.future import ustr, bytebuffer # pylint: disable=W0611
from azurelinuxagent.common.exception import OSUtilError, CryptError
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.utils.textutil as textutil # pylint: disable=W0611
from azurelinuxagent.common.utils.cryptutil import CryptUtil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class Redhat6xOSUtil(DefaultOSUtil):
def __init__(self):
super(Redhat6xOSUtil, self).__init__()
self.jit_enabled = True
def start_network(self):
return shellutil.run("/sbin/service networking start", chk_err=False)
def restart_ssh_service(self):
return shellutil.run("/sbin/service sshd condrestart", chk_err=False)
def stop_agent_service(self):
return shellutil.run("/sbin/service {0} stop".format(self.service_name), chk_err=False)
def start_agent_service(self):
return shellutil.run("/sbin/service {0} start".format(self.service_name), chk_err=False)
def register_agent_service(self):
return shellutil.run("chkconfig --add {0}".format(self.service_name), chk_err=False)
def unregister_agent_service(self):
return shellutil.run("chkconfig --del {0}".format(self.service_name), chk_err=False)
def openssl_to_openssh(self, input_file, output_file):
pubkey = fileutil.read_file(input_file)
try:
cryptutil = CryptUtil(conf.get_openssl_cmd())
ssh_rsa_pubkey = cryptutil.asn1_to_ssh(pubkey)
except CryptError as e:
raise OSUtilError(ustr(e))
fileutil.append_file(output_file, ssh_rsa_pubkey)
# Override
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "dhclient"])
def set_hostname(self, hostname):
"""
Set /etc/sysconfig/network
"""
fileutil.update_conf_file('/etc/sysconfig/network',
'HOSTNAME',
'HOSTNAME={0}'.format(hostname))
self._run_command_without_raising(["hostname", hostname], log_error=False)
def set_dhcp_hostname(self, hostname):
ifname = self.get_if_name()
filepath = "/etc/sysconfig/network-scripts/ifcfg-{0}".format(ifname)
fileutil.update_conf_file(filepath,
'DHCP_HOSTNAME',
'DHCP_HOSTNAME={0}'.format(hostname))
def get_dhcp_lease_endpoint(self):
return self.get_endpoint_from_leases_path('/var/lib/dhclient/dhclient-*.leases')
class RedhatOSUtil(Redhat6xOSUtil):
def __init__(self):
super(RedhatOSUtil, self).__init__()
self.service_name = self.get_service_name()
@staticmethod
def get_systemd_unit_file_install_path():
return "/usr/lib/systemd/system"
def set_hostname(self, hostname):
"""
Unlike redhat 6.x, redhat 7.x will set hostname via hostnamectl
Due to a bug in systemd in Centos-7.0, if this call fails, fallback
to hostname.
"""
hostnamectl_cmd = ['hostnamectl', 'set-hostname', hostname, '--static']
try:
shellutil.run_command(hostnamectl_cmd, log_error=False)
except shellutil.CommandError:
logger.warn("[{0}] failed, attempting fallback".format(' '.join(hostnamectl_cmd)))
DefaultOSUtil.set_hostname(self, hostname)
def get_nm_controlled(self, ifname):
filepath = "/etc/sysconfig/network-scripts/ifcfg-{0}".format(ifname)
nm_controlled_cmd = ['grep', 'NM_CONTROLLED=', filepath]
try:
result = shellutil.run_command(nm_controlled_cmd, log_error=False).rstrip()
if result and len(result.split('=')) > 1:
# Remove trailing white space and ' or " characters
value = result.split('=')[1].replace("'", '').replace('"', '').rstrip()
if value == "n" or value == "no":
return False
except shellutil.CommandError as e:
# Command might fail because NM_CONTROLLED value is not in interface config file (exit code 1).
# Log warning for any other exit code.
# NM_CONTROLLED=y by default if not specified.
if e.returncode != 1:
logger.warn("[{0}] failed: {1}.\nAgent will continue to publish hostname without NetworkManager restart".format(' '.join(nm_controlled_cmd), e))
except Exception as e:
logger.warn("Unexpected error while retrieving value of NM_CONTROLLED in {0}: {1}.\nAgent will continue to publish hostname without NetworkManager restart".format(filepath, e))
return True
def get_nic_operational_and_general_states(self, ifname):
"""
Checks the contents of /sys/class/net/{ifname}/operstate and the results of 'nmcli -g general.state device show {ifname}' to determine the state of the provided interface.
Raises an exception if the network interface state cannot be determined.
"""
filepath = "/sys/class/net/{0}/operstate".format(ifname)
nic_general_state_cmd = ['nmcli', '-g', 'general.state', 'device', 'show', ifname]
if not os.path.isfile(filepath):
msg = "Unable to determine primary network interface {0} state, because state file does not exist: {1}".format(ifname, filepath)
logger.warn(msg)
raise Exception(msg)
try:
nic_oper_state = fileutil.read_file(filepath).rstrip().lower()
nic_general_state = shellutil.run_command(nic_general_state_cmd, log_error=True).rstrip().lower()
if nic_oper_state != "up":
logger.warn("The primary network interface {0} operational state is '{1}'.".format(ifname, nic_oper_state))
else:
logger.info("The primary network interface {0} operational state is '{1}'.".format(ifname, nic_oper_state))
if nic_general_state != "100 (connected)":
logger.warn("The primary network interface {0} general state is '{1}'.".format(ifname, nic_general_state))
else:
logger.info("The primary network interface {0} general state is '{1}'.".format(ifname, nic_general_state))
return nic_oper_state, nic_general_state
except Exception as e:
msg = "Unexpected error while determining the primary network interface state: {0}".format(e)
logger.warn(msg)
raise Exception(msg)
def check_and_recover_nic_state(self, ifname):
"""
Checks if the provided network interface is in an 'up' state. If the network interface is in a 'down' state,
attempt to recover the interface by restarting the Network Manager service.
Raises an exception if an attempt to bring the interface into an 'up' state fails, or if the state
of the network interface cannot be determined.
"""
nic_operstate, nic_general_state = self.get_nic_operational_and_general_states(ifname)
if nic_operstate == "down" or "disconnected" in nic_general_state:
logger.info("Restarting the Network Manager service to recover network interface {0}".format(ifname))
self.restart_network_manager()
# Interface does not come up immediately after NetworkManager restart. Wait 5 seconds before checking
# network interface state.
time.sleep(5)
nic_operstate, nic_general_state = self.get_nic_operational_and_general_states(ifname)
# It is possible for network interface to be in an unknown or unmanaged state. Log warning if state is not
# down, disconnected, up, or connected
if nic_operstate != "up" or nic_general_state != "100 (connected)":
msg = "Network Manager restart failed to bring network interface {0} into 'up' and 'connected' state".format(ifname)
logger.warn(msg)
raise Exception(msg)
else:
logger.info("Network Manager restart successfully brought the network interface {0} into 'up' and 'connected' state".format(ifname))
elif nic_operstate != "up" or nic_general_state != "100 (connected)":
# We already logged a warning with the network interface state in get_nic_operstate(). Raise an exception
# for the env thread to send to telemetry.
raise Exception("The primary network interface {0} operational state is '{1}' and general state is '{2}'.".format(ifname, nic_operstate, nic_general_state))
def restart_network_manager(self):
shellutil.run("service NetworkManager restart")
def publish_hostname(self, hostname, recover_nic=False):
"""
Restart NetworkManager first before publishing hostname, only if the network interface is not controlled by the
NetworkManager service (as determined by NM_CONTROLLED=n in the interface configuration). If the NetworkManager
service is restarted before the agent publishes the hostname, and NM_controlled=y, a race condition may happen
between the NetworkManager service and the Guest Agent making changes to the network interface configuration
simultaneously.
Note: check_and_recover_nic_state(ifname) raises an Exception if an attempt to recover the network interface
fails, or if the network interface state cannot be determined. Callers should handle this exception by sending
an event to telemetry.
TODO: Improve failure reporting and add success reporting to telemetry for hostname changes. Right now we are only reporting failures to telemetry by raising an Exception in publish_hostname for the calling thread to handle by reporting the failure to telemetry.
"""
ifname = self.get_if_name()
nm_controlled = self.get_nm_controlled(ifname)
if not nm_controlled:
self.restart_network_manager()
# TODO: Current recover logic is only effective when the NetworkManager manages the network interface. Update the recover logic so it is effective even when NM_CONTROLLED=n
super(RedhatOSUtil, self).publish_hostname(hostname, recover_nic and nm_controlled)
def register_agent_service(self):
return shellutil.run("systemctl enable {0}".format(self.service_name), chk_err=False)
def unregister_agent_service(self):
return shellutil.run("systemctl disable {0}".format(self.service_name), chk_err=False)
def openssl_to_openssh(self, input_file, output_file):
DefaultOSUtil.openssl_to_openssh(self, input_file, output_file)
def get_dhcp_lease_endpoint(self):
# dhclient
endpoint = self.get_endpoint_from_leases_path('/var/lib/dhclient/dhclient-*.lease')
if endpoint is None:
# NetworkManager
endpoint = self.get_endpoint_from_leases_path('/var/lib/NetworkManager/dhclient-*.lease')
return endpoint
class RedhatOSModernUtil(RedhatOSUtil):
def __init__(self): # pylint: disable=W0235
super(RedhatOSModernUtil, self).__init__()
def restart_if(self, ifname, retries=3, wait=5):
"""
Restart an interface by bouncing the link. systemd-networkd observes
this event, and forces a renew of DHCP.
"""
retry_limit = retries + 1
for attempt in range(1, retry_limit):
return_code = shellutil.run("ip link set {0} down && ip link set {0} up".format(ifname))
if return_code == 0:
return
logger.warn("failed to restart {0}: return code {1}".format(ifname, return_code))
if attempt < retry_limit:
logger.info("retrying in {0} seconds".format(wait))
time.sleep(wait)
else:
logger.warn("exceeded restart retries")
def check_and_recover_nic_state(self, ifname):
# TODO: Implement and test a way to recover the network interface for RedhatOSModernUtil
pass
def publish_hostname(self, hostname, recover_nic=False):
# RedhatOSUtil was updated to conditionally run NetworkManager restart in response to a race condition between
# NetworkManager restart and the agent restarting the network interface during publish_hostname. Keeping the
# NetworkManager restart in RedhatOSModernUtil because the issue was not reproduced on these versions.
shellutil.run("service NetworkManager restart")
DefaultOSUtil.publish_hostname(self, hostname)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/suse.py 0000664 0000000 0000000 00000015570 14626177470 0025125 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import time
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil # pylint: disable=W0611
from azurelinuxagent.common.exception import OSUtilError # pylint: disable=W0611
from azurelinuxagent.common.future import ustr # pylint: disable=W0611
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class SUSE11OSUtil(DefaultOSUtil):
def __init__(self):
super(SUSE11OSUtil, self).__init__()
self.jit_enabled = True
self.dhclient_name = 'dhcpcd'
def set_hostname(self, hostname):
fileutil.write_file('/etc/HOSTNAME', hostname)
self._run_command_without_raising(["hostname", hostname], log_error=False)
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", self.dhclient_name])
def is_dhcp_enabled(self):
return True
def stop_dhcp_service(self):
self._run_command_without_raising(["/sbin/service", self.dhclient_name, "stop"], log_error=False)
def start_dhcp_service(self):
self._run_command_without_raising(["/sbin/service", self.dhclient_name, "start"], log_error=False)
def start_network(self):
self._run_command_without_raising(["/sbin/service", "network", "start"], log_error=False)
def restart_ssh_service(self):
self._run_command_without_raising(["/sbin/service", "sshd", "restart"], log_error=False)
def stop_agent_service(self):
self._run_command_without_raising(["/sbin/service", self.service_name, "stop"], log_error=False)
def start_agent_service(self):
self._run_command_without_raising(["/sbin/service", self.service_name, "start"], log_error=False)
def register_agent_service(self):
self._run_command_without_raising(["/sbin/insserv", self.service_name], log_error=False)
def unregister_agent_service(self):
self._run_command_without_raising(["/sbin/insserv", "-r", self.service_name], log_error=False)
class SUSEOSUtil(SUSE11OSUtil):
def __init__(self):
super(SUSEOSUtil, self).__init__()
self.dhclient_name = 'wickedd-dhcp4'
def publish_hostname(self, hostname, recover_nic=False):
self.set_dhcp_hostname(hostname)
self.set_hostname_record(hostname)
ifname = self.get_if_name()
# To push the hostname to the dhcp server we do not need to
# bring down the interface, just make the make ifup do whatever is
# necessary
self.ifup(ifname)
def ifup(self, ifname, retries=3, wait=5):
logger.info('Interface {0} bounce with ifup'.format(ifname))
retry_limit=retries+1
for attempt in range(1, retry_limit):
try:
shellutil.run_command(['ifup', ifname], log_error=True)
except Exception:
if attempt < retry_limit:
logger.info("retrying in {0} seconds".format(wait))
time.sleep(wait)
else:
logger.warn("exceeded restart retries")
@staticmethod
def get_systemd_unit_file_install_path():
return "/usr/lib/systemd/system"
def set_hostname(self, hostname):
self._run_command_without_raising(
["hostnamectl", "set-hostname", hostname], log_error=False
)
def set_dhcp_hostname(self, hostname):
dhcp_config_file_path = '/etc/sysconfig/network/dhcp'
hostname_send_setting = fileutil.get_line_startingwith(
'DHCLIENT_HOSTNAME_OPTION', dhcp_config_file_path
)
if hostname_send_setting:
value = hostname_send_setting.split('=')[-1]
# wicked's source accepts values with double quotes, single quotes, and no quotes at all.
if value in ('"AUTO"', "'AUTO'", 'AUTO') or value == '"{0}"'.format(hostname):
# Return if auto send host-name is configured or the current
# hostname is already set up to be sent
return
else:
# Do not use update_conf_file as it moves the setting to the
# end of the file separating it from the contextual comment
new_conf = []
dhcp_conf = fileutil.read_file(
dhcp_config_file_path).split('\n')
for entry in dhcp_conf:
if entry.startswith('DHCLIENT_HOSTNAME_OPTION'):
new_conf.append(
'DHCLIENT_HOSTNAME_OPTION="{0}"'. format(hostname)
)
continue
new_conf.append(entry)
fileutil.write_file(dhcp_config_file_path, '\n'.join(new_conf))
else:
fileutil.append_file(
dhcp_config_file_path,
'DHCLIENT_HOSTNAME_OPTION="{0}"'. format(hostname)
)
def stop_dhcp_service(self):
self._run_command_without_raising(["systemctl", "stop", "{}.service".format(self.dhclient_name)],
log_error=False)
def start_dhcp_service(self):
self._run_command_without_raising(["systemctl", "start", "{}.service".format(self.dhclient_name)],
log_error=False)
def start_network(self):
self._run_command_without_raising(["systemctl", "start", "network.service"], log_error=False)
def restart_ssh_service(self):
self._run_command_without_raising(["systemctl", "restart", "sshd.service"], log_error=False)
def stop_agent_service(self):
self._run_command_without_raising(["systemctl", "stop", "{}.service".format(self.service_name)],
log_error=False)
def start_agent_service(self):
self._run_command_without_raising(["systemctl", "start", "{}.service".format(self.service_name)],
log_error=False)
def register_agent_service(self):
self._run_command_without_raising(["systemctl", "enable", "{}.service".format(self.service_name)],
log_error=False)
def unregister_agent_service(self):
self._run_command_without_raising(["systemctl", "disable", "{}.service".format(self.service_name)],
log_error=False)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/systemd.py 0000664 0000000 0000000 00000004772 14626177470 0025640 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import re
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import shellutil
def _get_os_util():
if _get_os_util.value is None:
_get_os_util.value = get_osutil()
return _get_os_util.value
_get_os_util.value = None
def is_systemd():
"""
Determine if systemd is managing system services; the implementation follows the same strategy as, for example,
sd_booted() in libsystemd, or /usr/sbin/service
"""
return os.path.exists("/run/systemd/system/")
def get_version():
# the output is similar to
# $ systemctl --version
# systemd 245 (245.4-4ubuntu3)
# +PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP etc
#
return shellutil.run_command(['systemctl', '--version'])
def get_unit_file_install_path():
"""
e.g. /lib/systemd/system
"""
return _get_os_util().get_systemd_unit_file_install_path()
def get_agent_unit_name():
"""
e.g. walinuxagent.service
"""
return _get_os_util().get_service_name() + ".service"
def get_agent_unit_file():
"""
e.g. /lib/systemd/system/walinuxagent.service
"""
return os.path.join(get_unit_file_install_path(), get_agent_unit_name())
def get_agent_drop_in_path():
"""
e.g. /lib/systemd/system/walinuxagent.service.d
"""
return os.path.join(get_unit_file_install_path(), "{0}.d".format(get_agent_unit_name()))
def get_unit_property(unit_name, property_name):
output = shellutil.run_command(["systemctl", "show", unit_name, "--property", property_name])
# Output is similar to
# # systemctl show walinuxagent.service --property CPUQuotaPerSecUSec
# CPUQuotaPerSecUSec=50ms
match = re.match("[^=]+=(?P.+)", output)
if match is None:
raise ValueError("Can't find property {0} of {1}".format(property_name, unit_name))
return match.group('value')
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/osutil/ubuntu.py 0000664 0000000 0000000 00000014370 14626177470 0025465 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import glob
import textwrap
import time
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.osutil.default import DefaultOSUtil
class Ubuntu14OSUtil(DefaultOSUtil):
def __init__(self):
super(Ubuntu14OSUtil, self).__init__()
self.jit_enabled = True
self.service_name = self.get_service_name()
@staticmethod
def get_service_name():
return "walinuxagent"
def start_network(self):
return shellutil.run("service networking start", chk_err=False)
def stop_agent_service(self):
try:
shellutil.run_command(["service", self.service_name, "stop"])
except shellutil.CommandError as cmd_err:
return cmd_err.returncode
return 0
def start_agent_service(self):
try:
shellutil.run_command(["service", self.service_name, "start"])
except shellutil.CommandError as cmd_err:
return cmd_err.returncode
return 0
def remove_rules_files(self, rules_files=""):
pass
def restore_rules_files(self, rules_files=""):
pass
def get_dhcp_lease_endpoint(self):
return self.get_endpoint_from_leases_path('/var/lib/dhcp/dhclient.*.leases')
class Ubuntu12OSUtil(Ubuntu14OSUtil):
def __init__(self): # pylint: disable=W0235
super(Ubuntu12OSUtil, self).__init__()
# Override
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "dhclient3"])
class Ubuntu16OSUtil(Ubuntu14OSUtil):
"""
Ubuntu 16.04, 16.10, and 17.04.
"""
def __init__(self):
super(Ubuntu16OSUtil, self).__init__()
self.service_name = self.get_service_name()
def register_agent_service(self):
return shellutil.run("systemctl unmask {0}".format(self.service_name), chk_err=False)
def unregister_agent_service(self):
return shellutil.run("systemctl mask {0}".format(self.service_name), chk_err=False)
class Ubuntu18OSUtil(Ubuntu16OSUtil):
"""
Ubuntu >=18.04 and <=24.04
"""
def __init__(self):
super(Ubuntu18OSUtil, self).__init__()
self.service_name = self.get_service_name()
def restart_if(self, ifname, retries=3, wait=5):
"""
Restart systemd-networkd
"""
retry_limit=retries+1
for attempt in range(1, retry_limit):
try:
shellutil.run_command(["systemctl", "restart", "systemd-networkd"])
except shellutil.CommandError as cmd_err:
logger.warn("failed to restart systemd-networkd: return code {1}".format(cmd_err.returncode))
if attempt < retry_limit:
logger.info("retrying in {0} seconds".format(wait))
time.sleep(wait)
else:
logger.warn("exceeded restart retries")
def get_dhcp_pid(self):
return self._get_dhcp_pid(["pidof", "systemd-networkd"])
def start_network(self):
return shellutil.run("systemctl start systemd-networkd", chk_err=False)
def stop_network(self):
return shellutil.run("systemctl stop systemd-networkd", chk_err=False)
def start_dhcp_service(self):
return self.start_network()
def stop_dhcp_service(self):
return self.stop_network()
def start_agent_service(self):
return shellutil.run("systemctl start {0}".format(self.service_name), chk_err=False)
def stop_agent_service(self):
return shellutil.run("systemctl stop {0}".format(self.service_name), chk_err=False)
def get_dhcp_lease_endpoint(self):
pathglob = "/run/systemd/netif/leases/*"
logger.info("looking for leases in path [{0}]".format(pathglob))
endpoint = None
for lease_file in glob.glob(pathglob):
try:
with open(lease_file) as f:
lease = f.read()
for line in lease.splitlines():
if line.startswith("OPTION_245"):
option_245 = line.split("=")[1]
options = [int(i, 16) for i in textwrap.wrap(option_245, 2)]
endpoint = "{0}.{1}.{2}.{3}".format(*options)
logger.info("found endpoint [{0}]".format(endpoint))
except Exception as e:
logger.info(
"Failed to parse {0}: {1}".format(lease_file, str(e))
)
if endpoint is not None:
logger.info("cached endpoint found [{0}]".format(endpoint))
else:
logger.info("cached endpoint not found")
return endpoint
class UbuntuOSUtil(Ubuntu16OSUtil):
def __init__(self): # pylint: disable=W0235
super(UbuntuOSUtil, self).__init__()
def restart_if(self, ifname, retries=3, wait=5):
"""
Restart an interface by bouncing the link. systemd-networkd observes
this event, and forces a renew of DHCP.
"""
retry_limit = retries+1
for attempt in range(1, retry_limit):
return_code = shellutil.run("ip link set {0} down && ip link set {0} up".format(ifname))
if return_code == 0:
return
logger.warn("failed to restart {0}: return code {1}".format(ifname, return_code))
if attempt < retry_limit:
logger.info("retrying in {0} seconds".format(wait))
time.sleep(wait)
else:
logger.warn("exceeded restart retries")
class UbuntuSnappyOSUtil(Ubuntu14OSUtil):
def __init__(self):
super(UbuntuSnappyOSUtil, self).__init__()
self.conf_file_path = '/apps/walinuxagent/current/waagent.conf'
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/ 0000775 0000000 0000000 00000000000 14626177470 0024106 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/__init__.py 0000664 0000000 0000000 00000001165 14626177470 0026222 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/extensions_goal_state.py 0000664 0000000 0000000 00000015757 14626177470 0031100 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import datetime
from azurelinuxagent.common import logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.exception import AgentError
from azurelinuxagent.common.utils import textutil
class GoalStateChannel(object):
WireServer = "WireServer"
HostGAPlugin = "HostGAPlugin"
Empty = "Empty"
class GoalStateSource(object):
Fabric = "Fabric"
FastTrack = "FastTrack"
Empty = "Empty"
class VmSettingsParseError(AgentError):
"""
Error raised when the VmSettings are malformed
"""
def __init__(self, message, etag, vm_settings_text, inner=None):
super(VmSettingsParseError, self).__init__(message, inner)
self.etag = etag
self.vm_settings_text = vm_settings_text
class ExtensionsGoalState(object):
"""
ExtensionsGoalState represents the extensions information in the goal state; that information can originate from
ExtensionsConfig when the goal state is retrieved from the WireServe or from vmSettings when it is retrieved from
the HostGAPlugin.
NOTE: This is an abstract class. The corresponding concrete classes can be instantiated using the ExtensionsGoalStateFactory.
"""
def __init__(self):
self._is_outdated = False
@property
def id(self):
"""
Returns a string that includes the incarnation number if the ExtensionsGoalState was created from ExtensionsConfig, or the etag if it
was created from vmSettings.
"""
raise NotImplementedError()
@property
def is_outdated(self):
"""
A goal state can be outdated if, for example, the VM Agent is using Fast Track and support for it stops (e.g. the VM is migrated
to a node with an older version of the HostGAPlugin) and now the Agent is fetching goal states via the WireServer.
"""
return self._is_outdated
@is_outdated.setter
def is_outdated(self, value):
self._is_outdated = value
@property
def svd_sequence_number(self):
raise NotImplementedError()
@property
def activity_id(self):
raise NotImplementedError()
@property
def correlation_id(self):
raise NotImplementedError()
@property
def created_on_timestamp(self):
raise NotImplementedError()
@property
def channel(self):
"""
Whether the goal state was retrieved from the WireServer or the HostGAPlugin
"""
raise NotImplementedError()
@property
def source(self):
"""
Whether the goal state originated from Fabric or Fast Track
"""
raise NotImplementedError()
@property
def status_upload_blob(self):
raise NotImplementedError()
@property
def status_upload_blob_type(self):
raise NotImplementedError()
def _set_status_upload_blob_type(self, value):
raise NotImplementedError()
@property
def required_features(self):
raise NotImplementedError()
@property
def on_hold(self):
raise NotImplementedError()
@property
def agent_families(self):
raise NotImplementedError()
@property
def extensions(self):
raise NotImplementedError()
def get_redacted_text(self):
"""
Returns the raw text (either the ExtensionsConfig or the vmSettings) with any confidential data removed, or an empty string for empty goal states.
"""
raise NotImplementedError()
def _do_common_validations(self):
"""
Does validations common to vmSettings and ExtensionsConfig
"""
if self.status_upload_blob_type not in ["BlockBlob", "PageBlob"]:
logger.info("Status Blob type '{0}' is not valid, assuming BlockBlob", self.status_upload_blob)
self._set_status_upload_blob_type("BlockBlob")
@staticmethod
def _ticks_to_utc_timestamp(ticks_string):
"""
Takes 'ticks', a string indicating the number of ticks since midnight 0001-01-01 00:00:00, and
returns a UTC timestamp (every tick is 1/10000000 of a second).
"""
minimum = datetime.datetime(1900, 1, 1, 0, 0) # min value accepted by datetime.strftime()
as_date_time = minimum
if ticks_string not in (None, ""):
try:
as_date_time = datetime.datetime.min + datetime.timedelta(seconds=float(ticks_string) / 10 ** 7)
except Exception as exception:
logger.verbose("Can't parse ticks: {0}", textutil.format_exception(exception))
as_date_time = max(as_date_time, minimum)
return as_date_time.strftime(logger.Logger.LogTimeFormatInUTC)
@staticmethod
def _string_to_id(id_string):
"""
Takes 'id', a string indicating an ID, and returns a null GUID if the string is None or empty; otherwise
return 'id' unchanged
"""
if id_string in (None, ""):
return AgentGlobals.GUID_ZERO
return id_string
class EmptyExtensionsGoalState(ExtensionsGoalState):
def __init__(self, incarnation):
super(EmptyExtensionsGoalState, self).__init__()
self._id = "incarnation_{0}".format(incarnation)
self._incarnation = incarnation
@property
def id(self):
return self._id
@property
def incarnation(self):
return self._incarnation
@property
def svd_sequence_number(self):
return self._incarnation
@property
def activity_id(self):
return AgentGlobals.GUID_ZERO
@property
def correlation_id(self):
return AgentGlobals.GUID_ZERO
@property
def created_on_timestamp(self):
return datetime.datetime.min
@property
def channel(self):
return GoalStateChannel.Empty
@property
def source(self):
return GoalStateSource.Empty
@property
def status_upload_blob(self):
return None
@property
def status_upload_blob_type(self):
return None
def _set_status_upload_blob_type(self, value):
raise TypeError("EmptyExtensionsGoalState is immutable; cannot change the value of the status upload blob")
@property
def required_features(self):
return []
@property
def on_hold(self):
return False
@property
def agent_families(self):
return []
@property
def extensions(self):
return []
def get_redacted_text(self):
return ''
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/extensions_goal_state_factory.py 0000664 0000000 0000000 00000002734 14626177470 0032616 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
from azurelinuxagent.common.protocol.extensions_goal_state import EmptyExtensionsGoalState
from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig
from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings
class ExtensionsGoalStateFactory(object):
@staticmethod
def create_empty(incarnation):
return EmptyExtensionsGoalState(incarnation)
@staticmethod
def create_from_extensions_config(incarnation, xml_text, wire_client):
return ExtensionsGoalStateFromExtensionsConfig(incarnation, xml_text, wire_client)
@staticmethod
def create_from_vm_settings(etag, json_text, correlation_id):
return ExtensionsGoalStateFromVmSettings(etag, json_text, correlation_id)
extensions_goal_state_from_extensions_config.py 0000664 0000000 0000000 00000070704 14626177470 0035641 0 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import json
from collections import defaultdict
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import ExtensionsConfigError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, GoalStateSource
from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentFamily, ExtensionState, InVMGoalStateMetaData
from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, format_exception, \
is_str_none_or_whitespace, is_str_empty
class ExtensionsGoalStateFromExtensionsConfig(ExtensionsGoalState):
def __init__(self, incarnation, xml_text, wire_client):
super(ExtensionsGoalStateFromExtensionsConfig, self).__init__()
self._id = "incarnation_{0}".format(incarnation)
self._is_outdated = False
self._incarnation = incarnation
self._text = xml_text
self._status_upload_blob = None
self._status_upload_blob_type = None
self._required_features = []
self._on_hold = False
self._activity_id = None
self._correlation_id = None
self._created_on_timestamp = None
self._agent_families = []
self._extensions = []
try:
self._parse_extensions_config(xml_text, wire_client)
self._do_common_validations()
except Exception as e:
raise ExtensionsConfigError("Error parsing ExtensionsConfig (incarnation: {0}): {1}\n{2}".format(incarnation, format_exception(e), self.get_redacted_text()))
def _parse_extensions_config(self, xml_text, wire_client):
xml_doc = parse_doc(xml_text)
ga_families_list = find(xml_doc, "GAFamilies")
ga_families = findall(ga_families_list, "GAFamily")
for ga_family in ga_families:
name = findtext(ga_family, "Name")
version = findtext(ga_family, "Version")
is_version_from_rsm = findtext(ga_family, "IsVersionFromRSM")
is_vm_enabled_for_rsm_upgrades = findtext(ga_family, "IsVMEnabledForRSMUpgrades")
uris_list = find(ga_family, "Uris")
uris = findall(uris_list, "Uri")
family = VMAgentFamily(name)
family.version = version
if is_version_from_rsm is not None: # checking None because converting string to lowercase
family.is_version_from_rsm = is_version_from_rsm.lower() == "true"
if is_vm_enabled_for_rsm_upgrades is not None: # checking None because converting string to lowercase
family.is_vm_enabled_for_rsm_upgrades = is_vm_enabled_for_rsm_upgrades.lower() == "true"
for uri in uris:
family.uris.append(gettext(uri))
self._agent_families.append(family)
self.__parse_plugins_and_settings_and_populate_ext_handlers(xml_doc)
required_features_list = find(xml_doc, "RequiredFeatures")
if required_features_list is not None:
self._parse_required_features(required_features_list)
self._status_upload_blob = findtext(xml_doc, "StatusUploadBlob")
status_upload_node = find(xml_doc, "StatusUploadBlob")
self._status_upload_blob_type = getattrib(status_upload_node, "statusBlobType")
logger.verbose("Extension config shows status blob type as [{0}]", self._status_upload_blob_type)
self._on_hold = ExtensionsGoalStateFromExtensionsConfig._fetch_extensions_on_hold(xml_doc, wire_client)
in_vm_gs_metadata = InVMGoalStateMetaData(find(xml_doc, "InVMGoalStateMetaData"))
self._activity_id = self._string_to_id(in_vm_gs_metadata.activity_id)
self._correlation_id = self._string_to_id(in_vm_gs_metadata.correlation_id)
self._created_on_timestamp = self._ticks_to_utc_timestamp(in_vm_gs_metadata.created_on_ticks)
@staticmethod
def _fetch_extensions_on_hold(xml_doc, wire_client):
def log_info(message):
logger.info(message)
add_event(op=WALAEventOperation.ArtifactsProfileBlob, message=message, is_success=True, log_event=False)
def log_warning(message):
logger.warn(message)
add_event(op=WALAEventOperation.ArtifactsProfileBlob, message=message, is_success=False, log_event=False)
artifacts_profile_blob = findtext(xml_doc, "InVMArtifactsProfileBlob")
if is_str_none_or_whitespace(artifacts_profile_blob):
log_info("ExtensionsConfig does not include a InVMArtifactsProfileBlob; will assume the VM is not on hold")
return False
try:
profile = wire_client.fetch_artifacts_profile_blob(artifacts_profile_blob)
except Exception as error:
log_warning("Can't download the artifacts profile blob; will assume the VM is not on hold. {0}".format(ustr(error)))
return False
if is_str_empty(profile):
log_info("The artifacts profile blob is empty; will assume the VM is not on hold.")
return False
try:
artifacts_profile = _InVMArtifactsProfile(profile)
except Exception as exception:
log_warning("Can't parse the artifacts profile blob; will assume the VM is not on hold. Error: {0}".format(ustr(exception)))
return False
return artifacts_profile.get_on_hold()
@property
def id(self):
return self._id
@property
def incarnation(self):
return self._incarnation
@property
def svd_sequence_number(self):
return self._incarnation
@property
def activity_id(self):
return self._activity_id
@property
def correlation_id(self):
return self._correlation_id
@property
def created_on_timestamp(self):
return self._created_on_timestamp
@property
def channel(self):
return GoalStateChannel.WireServer
@property
def source(self):
return GoalStateSource.Fabric
@property
def status_upload_blob(self):
return self._status_upload_blob
@property
def status_upload_blob_type(self):
return self._status_upload_blob_type
def _set_status_upload_blob_type(self, value):
self._status_upload_blob_type = value
@property
def required_features(self):
return self._required_features
@property
def on_hold(self):
return self._on_hold
@property
def agent_families(self):
return self._agent_families
@property
def extensions(self):
return self._extensions
def get_redacted_text(self):
text = self._text
for ext_handler in self._extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
text = text.replace(extension.protectedSettings, "*** REDACTED ***")
return text
def _parse_required_features(self, required_features_list):
for required_feature in findall(required_features_list, "RequiredFeature"):
feature_name = findtext(required_feature, "Name")
# per the documentation, RequiredFeatures also have a "Value" attribute but currently it is not being populated
self._required_features.append(feature_name)
def __parse_plugins_and_settings_and_populate_ext_handlers(self, xml_doc):
"""
Sample ExtensionConfig Plugin and PluginSettings:
{
"runtimeSettings": [
{
"handlerSettings": {
"publicSettings": {"01_add_extensions_with_dependency":"ff2a3da6-8e12-4ab6-a4ca-4e3a473ab385"}
}
}
]
}
{
"runtimeSettings": [
{
"handlerSettings": {
"publicSettings": {"01_add_extensions_with_dependency":"2e837740-cf7e-4528-b3a4-241002618f05"}
}
}
]
}
"""
plugins_list = find(xml_doc, "Plugins")
plugins = findall(plugins_list, "Plugin")
plugin_settings_list = find(xml_doc, "PluginSettings")
plugin_settings = findall(plugin_settings_list, "Plugin")
for plugin in plugins:
extension = Extension()
try:
ExtensionsGoalStateFromExtensionsConfig._parse_plugin(extension, plugin)
ExtensionsGoalStateFromExtensionsConfig._parse_plugin_settings(extension, plugin_settings)
except ExtensionsConfigError as error:
extension.invalid_setting_reason = ustr(error)
self._extensions.append(extension)
@staticmethod
def _parse_plugin(extension, plugin):
"""
Sample config:
https://rdfecurrentuswestcache3.blob.core.test-cint.azure-test.net/0e53c53ef0be4178bacb0a1fecf12a74/Microsoft.Azure.Extensions_CustomScript_usstagesc_manifest.xml
https://rdfecurrentuswestcache4.blob.core.test-cint.azure-test.net/0e53c53ef0be4178bacb0a1fecf12a74/Microsoft.Azure.Extensions_CustomScript_usstagesc_manifest.xml
Note that the `additionalLocations` subnode is populated with links
generated by PIR for resiliency. In regions with this feature enabled,
CRP will provide any extra links in the format above. If no extra links
are provided, the subnode will not exist.
"""
def _log_error_if_none(attr_name, value):
# Plugin Name and Version are very essential fields, without them we wont be able to even report back to CRP
# about that handler. For those cases we need to fail the GoalState completely but currently we dont support
# reporting status at a GoalState level (we only report at a handler level).
# Once that functionality is added to the GA, we would raise here rather than just report error in our logs.
if value in (None, ""):
add_event(op=WALAEventOperation.InvalidExtensionConfig,
message="{0} is None for ExtensionConfig, logging error".format(attr_name),
log_event=True, is_success=False)
return value
extension.name = _log_error_if_none("Extensions.Plugins.Plugin.name", getattrib(plugin, "name"))
extension.version = _log_error_if_none("Extensions.Plugins.Plugin.version",
getattrib(plugin, "version"))
extension.state = getattrib(plugin, "state")
if extension.state in (None, ""):
raise ExtensionsConfigError("Received empty Extensions.Plugins.Plugin.state, failing Handler")
def getattrib_wrapped_in_list(node, attr_name):
attr = getattrib(node, attr_name)
return [attr] if attr not in (None, "") else []
location = getattrib_wrapped_in_list(plugin, "location")
failover_location = getattrib_wrapped_in_list(plugin, "failoverlocation")
locations = location + failover_location
additional_location_node = find(plugin, "additionalLocations")
if additional_location_node is not None:
nodes_list = findall(additional_location_node, "additionalLocation")
locations += [gettext(node) for node in nodes_list]
for uri in locations:
extension.manifest_uris.append(uri)
@staticmethod
def _parse_plugin_settings(extension, plugin_settings):
"""
Sample config:
{
"runtimeSettings": [
{
"handlerSettings": {
"publicSettings": {"01_add_extensions_with_dependency":"ff2a3da6-8e12-4ab6-a4ca-4e3a473ab385"}
}
}
]
}
{
"runtimeSettings": [
{
"handlerSettings": {
"publicSettings": {"source":{"script":"Write-Host First: Hello World TestTry2!"},"parameters":[{"name":"extensionName","value":"firstRunCommand"}],"timeoutInSeconds":120}
}
}
]
}
"""
if plugin_settings is None:
return
extension_name = extension.name
version = extension.version
def to_lower(str_to_change): return str_to_change.lower() if str_to_change is not None else None
extension_plugin_settings = [x for x in plugin_settings if to_lower(getattrib(x, "name")) == to_lower(extension_name)]
if not extension_plugin_settings:
return
settings = [x for x in extension_plugin_settings if getattrib(x, "version") == version]
if len(settings) != len(extension_plugin_settings):
msg = "Extension PluginSettings Version Mismatch! Expected PluginSettings version: {0} for Extension: {1} but found versions: ({2})".format(
version, extension_name, ', '.join(set([getattrib(x, "version") for x in extension_plugin_settings])))
add_event(op=WALAEventOperation.PluginSettingsVersionMismatch, message=msg, log_event=True,
is_success=False)
raise ExtensionsConfigError(msg)
if len(settings) > 1:
msg = "Multiple plugin settings found for the same extension: {0} and version: {1} (Expected: 1; Available: {2})".format(
extension_name, version, len(settings))
raise ExtensionsConfigError(msg)
plugin_settings_node = settings[0]
runtime_settings_nodes = findall(plugin_settings_node, "RuntimeSettings")
extension_runtime_settings_nodes = findall(plugin_settings_node, "ExtensionRuntimeSettings")
if any(runtime_settings_nodes) and any(extension_runtime_settings_nodes):
# There can only be a single RuntimeSettings node or multiple ExtensionRuntimeSettings nodes per Plugin
msg = "Both RuntimeSettings and ExtensionRuntimeSettings found for the same extension: {0} and version: {1}".format(
extension_name, version)
raise ExtensionsConfigError(msg)
if runtime_settings_nodes:
if len(runtime_settings_nodes) > 1:
msg = "Multiple RuntimeSettings found for the same extension: {0} and version: {1} (Expected: 1; Available: {2})".format(
extension_name, version, len(runtime_settings_nodes))
raise ExtensionsConfigError(msg)
# Only Runtime settings available, parse that
ExtensionsGoalStateFromExtensionsConfig.__parse_runtime_settings(plugin_settings_node, runtime_settings_nodes[0], extension_name,
extension)
elif extension_runtime_settings_nodes:
# Parse the ExtensionRuntime settings for the given extension
ExtensionsGoalStateFromExtensionsConfig.__parse_extension_runtime_settings(plugin_settings_node, extension_runtime_settings_nodes,
extension)
@staticmethod
def __get_dependency_level_from_node(depends_on_node, name):
depends_on_level = 0
if depends_on_node is not None:
try:
depends_on_level = int(getattrib(depends_on_node, "dependencyLevel"))
except (ValueError, TypeError):
logger.warn("Could not parse dependencyLevel for handler {0}. Setting it to 0".format(name))
depends_on_level = 0
return depends_on_level
@staticmethod
def __parse_runtime_settings(plugin_settings_node, runtime_settings_node, extension_name, extension):
"""
Sample Plugin in PluginSettings containing DependsOn and RuntimeSettings (single settings per extension) -
{
"runtimeSettings": [
{
"handlerSettings": {
"protectedSettingsCertThumbprint": "",
"protectedSettings": "",
"publicSettings": {"UserName":"test1234"}
}
}
]
}
"""
depends_on_nodes = findall(plugin_settings_node, "DependsOn")
if len(depends_on_nodes) > 1:
msg = "Extension Handler can only have a single dependsOn node for Single config extensions. Found: {0}".format(
len(depends_on_nodes))
raise ExtensionsConfigError(msg)
depends_on_node = depends_on_nodes[0] if depends_on_nodes else None
depends_on_level = ExtensionsGoalStateFromExtensionsConfig.__get_dependency_level_from_node(depends_on_node, extension_name)
ExtensionsGoalStateFromExtensionsConfig.__parse_and_add_extension_settings(runtime_settings_node, extension_name, extension,
depends_on_level)
@staticmethod
def __parse_extension_runtime_settings(plugin_settings_node, extension_runtime_settings_nodes, extension):
"""
Sample PluginSettings containing DependsOn and ExtensionRuntimeSettings -
{
"runtimeSettings": [
{
"handlerSettings": {
"publicSettings": {"source":{"script":"Write-Host First: Hello World 1234!"}}
}
}
]
}
{
"runtimeSettings": [
{
"handlerSettings": {
"publicSettings": {"source":{"script":"Write-Host First: Hello World 1234!"}}
}
}
]
}
{
"runtimeSettings": [
{
"handlerSettings": {
"publicSettings": {"source":{"script":"Write-Host Third: Hello World 3!"}}
}
}
]
}
"""
# Parse and cache the Dependencies for each extension first
dependency_levels = defaultdict(int)
for depends_on_node in findall(plugin_settings_node, "DependsOn"):
extension_name = getattrib(depends_on_node, "name")
if extension_name in (None, ""):
raise ExtensionsConfigError("No Name not specified for DependsOn object in ExtensionRuntimeSettings for MultiConfig!")
dependency_level = ExtensionsGoalStateFromExtensionsConfig.__get_dependency_level_from_node(depends_on_node, extension_name)
dependency_levels[extension_name] = dependency_level
extension.supports_multi_config = True
for extension_runtime_setting_node in extension_runtime_settings_nodes:
# Name and State will only be set for ExtensionRuntimeSettings for Multi-Config
extension_name = getattrib(extension_runtime_setting_node, "name")
if extension_name in (None, ""):
raise ExtensionsConfigError("Extension Name not specified for ExtensionRuntimeSettings for MultiConfig!")
# State can either be `ExtensionState.Enabled` (default) or `ExtensionState.Disabled`
state = getattrib(extension_runtime_setting_node, "state")
state = ustr(state.lower()) if state not in (None, "") else ExtensionState.Enabled
ExtensionsGoalStateFromExtensionsConfig.__parse_and_add_extension_settings(extension_runtime_setting_node, extension_name,
extension, dependency_levels[extension_name],
state=state)
@staticmethod
def __parse_and_add_extension_settings(settings_node, name, extension, depends_on_level, state=ExtensionState.Enabled):
seq_no = getattrib(settings_node, "seqNo")
if seq_no in (None, ""):
raise ExtensionsConfigError("SeqNo not specified for the Extension: {0}".format(name))
try:
runtime_settings = json.loads(gettext(settings_node))
except ValueError as error:
logger.error("Invalid extension settings: {0}", ustr(error))
# Incase of invalid/no settings, add the name and seqNo of the Extension and treat it as an extension with
# no settings since we were able to successfully parse those data properly. Without this, we wont report
# anything for that sequence number and CRP would eventually have to timeout rather than fail fast.
extension.settings.append(
ExtensionSettings(name=name, sequenceNumber=seq_no, state=state, dependencyLevel=depends_on_level))
return
for plugin_settings_list in runtime_settings["runtimeSettings"]:
handler_settings = plugin_settings_list["handlerSettings"]
extension_settings = ExtensionSettings()
# There is no "extension name" for single Handler Settings. Use HandlerName for those
extension_settings.name = name
extension_settings.state = state
extension_settings.sequenceNumber = int(seq_no)
extension_settings.publicSettings = handler_settings.get("publicSettings")
extension_settings.protectedSettings = handler_settings.get("protectedSettings")
extension_settings.dependencyLevel = depends_on_level
thumbprint = handler_settings.get("protectedSettingsCertThumbprint")
extension_settings.certificateThumbprint = thumbprint
extension.settings.append(extension_settings)
# Do not extend this class
class _InVMArtifactsProfile(object):
"""
deserialized json string of InVMArtifactsProfile.
It is expected to contain the following fields:
* inVMArtifactsProfileBlobSeqNo
* profileId (optional)
* onHold (optional)
* certificateThumbprint (optional)
* encryptedHealthChecks (optional)
* encryptedApplicationProfile (optional)
"""
def __init__(self, artifacts_profile_json):
self._on_hold = False
artifacts_profile = parse_json(artifacts_profile_json)
on_hold = artifacts_profile.get('onHold')
if on_hold is not None:
# accept both bool and str values
on_hold_normalized = str(on_hold).lower()
if on_hold_normalized == "true":
self._on_hold = True
elif on_hold_normalized == "false":
self._on_hold = False
else:
raise Exception("Invalid value for onHold: {0}".format(on_hold))
def get_on_hold(self):
return self._on_hold
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py0000664 0000000 0000000 00000065353 14626177470 0034542 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import datetime
import json
import re
import sys
from azurelinuxagent.common import logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, VmSettingsParseError
from azurelinuxagent.common.protocol.restapi import VMAgentFamily, Extension, ExtensionRequestedState, ExtensionSettings
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
class ExtensionsGoalStateFromVmSettings(ExtensionsGoalState):
_MINIMUM_TIMESTAMP = datetime.datetime(1900, 1, 1, 0, 0) # min value accepted by datetime.strftime()
def __init__(self, etag, json_text, correlation_id):
super(ExtensionsGoalStateFromVmSettings, self).__init__()
self._id = "etag_{0}".format(etag)
self._etag = etag
self._svd_sequence_number = 0
self._hostga_plugin_correlation_id = correlation_id
self._text = json_text
self._host_ga_plugin_version = FlexibleVersion('0.0.0.0')
self._schema_version = FlexibleVersion('0.0.0.0')
self._activity_id = AgentGlobals.GUID_ZERO
self._correlation_id = AgentGlobals.GUID_ZERO
self._created_on_timestamp = self._MINIMUM_TIMESTAMP
self._source = None
self._status_upload_blob = None
self._status_upload_blob_type = None
self._required_features = []
self._on_hold = False
self._agent_families = []
self._extensions = []
try:
self._parse_vm_settings(json_text)
self._do_common_validations()
except Exception as e:
message = "Error parsing vmSettings [HGAP: {0} Etag:{1}]: {2}".format(self._host_ga_plugin_version, etag, ustr(e))
raise VmSettingsParseError(message, etag, self.get_redacted_text())
@property
def id(self):
return self._id
@property
def etag(self):
return self._etag
@property
def svd_sequence_number(self):
return self._svd_sequence_number
@property
def host_ga_plugin_version(self):
return self._host_ga_plugin_version
@property
def schema_version(self):
return self._schema_version
@property
def activity_id(self):
"""
The CRP activity id
"""
return self._activity_id
@property
def correlation_id(self):
"""
The correlation id for the CRP operation
"""
return self._correlation_id
@property
def hostga_plugin_correlation_id(self):
"""
The correlation id for the call to the HostGAPlugin vmSettings API
"""
return self._hostga_plugin_correlation_id
@property
def created_on_timestamp(self):
"""
Timestamp assigned by the CRP (time at which the goal state was created)
"""
return self._created_on_timestamp
@property
def channel(self):
return GoalStateChannel.HostGAPlugin
@property
def source(self):
return self._source
@property
def status_upload_blob(self):
return self._status_upload_blob
@property
def status_upload_blob_type(self):
return self._status_upload_blob_type
def _set_status_upload_blob_type(self, value):
self._status_upload_blob_type = value
@property
def required_features(self):
return self._required_features
@property
def on_hold(self):
return self._on_hold
@property
def agent_families(self):
return self._agent_families
@property
def extensions(self):
return self._extensions
def get_redacted_text(self):
return re.sub(r'("protectedSettings"\s*:\s*)"[^"]+"', r'\1"*** REDACTED ***"', self._text)
def _parse_vm_settings(self, json_text):
vm_settings = _CaseFoldedDict.from_dict(json.loads(json_text))
self._parse_simple_attributes(vm_settings)
self._parse_status_upload_blob(vm_settings)
self._parse_required_features(vm_settings)
self._parse_agent_manifests(vm_settings)
self._parse_extensions(vm_settings)
def _parse_simple_attributes(self, vm_settings):
# Sample:
# {
# "hostGAPluginVersion": "1.0.8.115",
# "vmSettingsSchemaVersion": "0.0",
# "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9",
# "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e",
# "inSvdSeqNo": 1,
# "extensionsLastModifiedTickCount": 637726657706205217,
# "extensionGoalStatesSource": "FastTrack",
# ...
# }
# The HGAP version is included in some messages, so parse it first
host_ga_plugin_version = vm_settings.get("hostGAPluginVersion")
if host_ga_plugin_version is not None:
self._host_ga_plugin_version = FlexibleVersion(host_ga_plugin_version)
self._activity_id = self._string_to_id(vm_settings.get("activityId"))
self._correlation_id = self._string_to_id(vm_settings.get("correlationId"))
self._svd_sequence_number = self._string_to_id(vm_settings.get("inSvdSeqNo"))
self._created_on_timestamp = self._ticks_to_utc_timestamp(vm_settings.get("extensionsLastModifiedTickCount"))
schema_version = vm_settings.get("vmSettingsSchemaVersion")
if schema_version is not None:
self._schema_version = FlexibleVersion(schema_version)
on_hold = vm_settings.get("onHold")
if on_hold is not None:
self._on_hold = on_hold
self._source = vm_settings.get("extensionGoalStatesSource")
if self._source is None:
self._source = "UNKNOWN"
def _parse_status_upload_blob(self, vm_settings):
# Sample:
# {
# ...
# "statusUploadBlob": {
# "statusBlobType": "BlockBlob",
# "value": "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w"
# },
# ...
# }
status_upload_blob = vm_settings.get("statusUploadBlob")
if status_upload_blob is None:
self._status_upload_blob = None
self._status_upload_blob_type = "BlockBlob"
else:
self._status_upload_blob = status_upload_blob.get("value")
if self._status_upload_blob is None:
raise Exception("Missing statusUploadBlob.value")
self._status_upload_blob_type = status_upload_blob.get("statusBlobType")
if self._status_upload_blob_type is None:
self._status_upload_blob_type = "BlockBlob"
def _parse_required_features(self, vm_settings):
# Sample:
# {
# ...
# "requiredFeatures": [
# {
# "name": "MultipleExtensionsPerHandler"
# }
# ],
# ...
# }
required_features = vm_settings.get("requiredFeatures")
if required_features is not None:
if not isinstance(required_features, list):
raise Exception("requiredFeatures should be an array (got {0})".format(required_features))
def get_required_features_names():
for feature in required_features:
name = feature.get("name")
if name is None:
raise Exception("A required feature is missing the 'name' property (got {0})".format(feature))
yield name
self._required_features.extend(get_required_features_names())
def _parse_agent_manifests(self, vm_settings):
# Sample:
# {
# ...
# "gaFamilies": [
# {
# "name": "Prod",
# "version": "9.9.9.9",
# "isVersionFromRSM": true,
# "isVMEnabledForRSMUpgrades": true,
# "uris": [
# "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml",
# "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml"
# ]
# },
# {
# "name": "Test",
# "uris": [
# "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml",
# "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml"
# ]
# }
# ],
# ...
# }
families = vm_settings.get("gaFamilies")
if families is None:
return
if not isinstance(families, list):
raise Exception("gaFamilies should be an array (got {0})".format(families))
for family in families:
name = family["name"]
version = family.get("version")
is_version_from_rsm = family.get("isVersionFromRSM")
is_vm_enabled_for_rsm_upgrades = family.get("isVMEnabledForRSMUpgrades")
uris = family.get("uris")
if uris is None:
uris = []
agent_family = VMAgentFamily(name)
agent_family.version = version
agent_family.is_version_from_rsm = is_version_from_rsm
agent_family.is_vm_enabled_for_rsm_upgrades = is_vm_enabled_for_rsm_upgrades
for u in uris:
agent_family.uris.append(u)
self._agent_families.append(agent_family)
def _parse_extensions(self, vm_settings):
# Sample (NOTE: The first sample is single-config, the second multi-config):
# {
# ...
# "extensionGoalStates": [
# {
# "name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent",
# "version": "1.9.1",
# "location": "https://zrdfepirv2cbn04prdstr01a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml",
# "state": "enabled",
# "autoUpgrade": true,
# "runAsStartupTask": false,
# "isJson": true,
# "useExactVersion": true,
# "settingsSeqNo": 0,
# "settings": [
# {
# "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F",
# "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==",
# "publicSettings": "{\"GCS_AUTO_CONFIG\":true}"
# }
# ],
# "dependsOn": [
# ...
# ]
# },
# {
# "name": "Microsoft.CPlat.Core.RunCommandHandlerLinux",
# "version": "1.2.0",
# "location": "https://umsavbvncrpzbnxmxzmr.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml",
# "failoverlocation": "https://umsajbjtqrb3zqjvgb2z.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml",
# "additionalLocations": [
# "https://umsawqtlsshtn5v2nfgh.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml"
# ],
# "state": "enabled",
# "autoUpgrade": true,
# "runAsStartupTask": false,
# "isJson": true,
# "useExactVersion": true,
# "settingsSeqNo": 0,
# "isMultiConfig": true,
# "settings": [
# {
# "publicSettings": "{\"source\":{\"script\":\"echo '4abb1e88-f349-41f8-8442-247d9fdfcac5'\"}}",
# "seqNo": 0,
# "extensionName": "MCExt1",
# "extensionState": "enabled"
# },
# {
# "publicSettings": "{\"source\":{\"script\":\"echo 'e865c9bc-a7b3-42c6-9a79-cfa98a1ee8b3'\"}}",
# "seqNo": 0,
# "extensionName": "MCExt2",
# "extensionState": "enabled"
# },
# {
# "publicSettings": "{\"source\":{\"script\":\"echo 'f923e416-0340-485c-9243-8b84fb9930c6'\"}}",
# "seqNo": 0,
# "extensionName": "MCExt3",
# "extensionState": "enabled"
# }
# ],
# "dependsOn": [
# ...
# ]
# }
# ...
# ]
# ...
# }
extension_goal_states = vm_settings.get("extensionGoalStates")
if extension_goal_states is not None:
if not isinstance(extension_goal_states, list):
raise Exception("extension_goal_states should be an array (got {0})".format(type(extension_goal_states))) # report only the type, since the value may contain secrets
for extension_gs in extension_goal_states:
extension = Extension()
extension.name = extension_gs['name']
extension.version = extension_gs['version']
extension.state = extension_gs['state']
if extension.state not in ExtensionRequestedState.All:
raise Exception('Invalid extension state: {0} ({1})'.format(extension.state, extension.name))
is_multi_config = extension_gs.get('isMultiConfig')
if is_multi_config is not None:
extension.supports_multi_config = is_multi_config
location = extension_gs.get('location')
if location is not None:
extension.manifest_uris.append(location)
fail_over_location = extension_gs.get('failoverLocation')
if fail_over_location is not None:
extension.manifest_uris.append(fail_over_location)
additional_locations = extension_gs.get('additionalLocations')
if additional_locations is not None:
if not isinstance(additional_locations, list):
raise Exception('additionalLocations should be an array (got {0})'.format(additional_locations))
extension.manifest_uris.extend(additional_locations)
#
# Settings
#
settings_list = extension_gs.get('settings')
if settings_list is not None:
if not isinstance(settings_list, list):
raise Exception("'settings' should be an array (extension: {0})".format(extension.name))
if not extension.supports_multi_config and len(settings_list) > 1:
raise Exception("Single-config extension includes multiple settings (extension: {0})".format(extension.name))
for s in settings_list:
settings = ExtensionSettings()
public_settings = s.get('publicSettings')
# Note that publicSettings, protectedSettings and protectedSettingsCertThumbprint can be None; do not change this to, for example,
# empty, since those values are serialized to the extension's status file and extensions may depend on the current implementation
# (for example, no public settings would currently be serialized as '"publicSettings": null')
settings.publicSettings = None if public_settings is None else json.loads(public_settings)
settings.protectedSettings = s.get('protectedSettings')
thumbprint = s.get('protectedSettingsCertThumbprint')
if thumbprint is None and settings.protectedSettings is not None:
raise Exception("The certificate thumbprint for protected settings is missing (extension: {0})".format(extension.name))
settings.certificateThumbprint = thumbprint
# in multi-config each settings have their own name, sequence number and state
if extension.supports_multi_config:
settings.name = s['extensionName']
settings.sequenceNumber = s['seqNo']
settings.state = s['extensionState']
else:
settings.name = extension.name
settings.sequenceNumber = extension_gs['settingsSeqNo']
settings.state = extension.state
extension.settings.append(settings)
#
# Dependency level
#
depends_on = extension_gs.get("dependsOn")
if depends_on is not None:
self._parse_dependency_level(depends_on, extension)
self._extensions.append(extension)
@staticmethod
def _parse_dependency_level(depends_on, extension):
# Sample (NOTE: The first sample is single-config, the second multi-config):
# {
# ...
# "extensionGoalStates": [
# {
# "name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent",
# ...
# "settings": [
# ...
# ],
# "dependsOn": [
# {
# "DependsOnExtension": [
# {
# "handler": "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent"
# }
# ],
# "dependencyLevel": 1
# }
# ]
# },
# {
# "name": "Microsoft.CPlat.Core.RunCommandHandlerLinux",
# ...
# "isMultiConfig": true,
# "settings": [
# {
# ...
# "extensionName": "MCExt1",
# },
# {
# ...
# "extensionName": "MCExt2",
# },
# {
# ...
# "extensionName": "MCExt3",
# }
# ],
# "dependsOn": [
# {
# "dependsOnExtension": [
# {
# "extension": "...",
# "handler": "..."
# },
# {
# "extension": "...",
# "handler": "..."
# }
# ],
# "dependencyLevel": 2,
# "name": "MCExt1"
# },
# {
# "dependsOnExtension": [
# {
# "extension": "...",
# "handler": "..."
# }
# ],
# "dependencyLevel": 1,
# "name": "MCExt2"
# }
# ...
# ]
# ...
# }
if not isinstance(depends_on, list):
raise Exception('dependsOn should be an array ({0}) (got {1})'.format(extension.name, depends_on))
if not extension.supports_multi_config:
# single-config
length = len(depends_on)
if length > 1:
raise Exception('dependsOn should be an array with exactly one item for single-config extensions ({0}) (got {1})'.format(extension.name, depends_on))
if length == 0:
logger.warn('dependsOn is an empty array for extension {0}; setting the dependency level to 0'.format(extension.name))
dependency_level = 0
else:
dependency_level = depends_on[0]['dependencyLevel']
depends_on_extension = depends_on[0].get('dependsOnExtension')
if depends_on_extension is None:
# TODO: Consider removing this check and its telemetry after a few releases if we do not receive any telemetry indicating
# that dependsOnExtension is actually missing from the vmSettings
message = 'Missing dependsOnExtension on extension {0}'.format(extension.name)
logger.warn(message)
add_event(WALAEventOperation.ProvisionAfterExtensions, message=message, is_success=False, log_event=False)
else:
message = '{0} depends on {1}'.format(extension.name, depends_on_extension)
logger.info(message)
add_event(WALAEventOperation.ProvisionAfterExtensions, message=message, is_success=True, log_event=False)
if len(extension.settings) == 0:
message = 'Extension {0} does not have any settings. Will ignore dependency (dependency level: {1})'.format(extension.name, dependency_level)
logger.warn(message)
add_event(WALAEventOperation.ProvisionAfterExtensions, message=message, is_success=False, log_event=False)
else:
extension.settings[0].dependencyLevel = dependency_level
else:
# multi-config
settings_by_name = {}
for settings in extension.settings:
settings_by_name[settings.name] = settings
for dependency in depends_on:
settings = settings_by_name.get(dependency["name"])
if settings is None:
raise Exception("Dependency '{0}' does not correspond to any of the settings in the extension (settings: {1})".format(dependency["name"], settings_by_name.keys()))
settings.dependencyLevel = dependency["dependencyLevel"]
#
# TODO: The current implementation of the vmSettings API uses inconsistent cases on the names of the json items it returns.
# To work around that, we use _CaseFoldedDict to query those json items in a case-insensitive matter, Do not use
# _CaseFoldedDict for other purposes. Remove it once the vmSettings API is updated.
#
class _CaseFoldedDict(dict):
@staticmethod
def from_dict(dictionary):
case_folded = _CaseFoldedDict()
for key, value in dictionary.items():
case_folded[key] = _CaseFoldedDict._to_case_folded_dict_item(value)
return case_folded
def get(self, key):
return super(_CaseFoldedDict, self).get(_casefold(key))
def has_key(self, key):
return super(_CaseFoldedDict, self).get(_casefold(key))
def __getitem__(self, key):
return super(_CaseFoldedDict, self).__getitem__(_casefold(key))
def __setitem__(self, key, value):
return super(_CaseFoldedDict, self).__setitem__(_casefold(key), value)
def __contains__(self, key):
return super(_CaseFoldedDict, self).__contains__(_casefold(key))
@staticmethod
def _to_case_folded_dict_item(item):
if isinstance(item, dict):
case_folded_dict = _CaseFoldedDict()
for key, value in item.items():
case_folded_dict[_casefold(key)] = _CaseFoldedDict._to_case_folded_dict_item(value)
return case_folded_dict
if isinstance(item, list):
return [_CaseFoldedDict._to_case_folded_dict_item(list_item) for list_item in item]
return item
def copy(self):
raise NotImplementedError()
@staticmethod
def fromkeys(*args, **kwargs):
raise NotImplementedError()
def pop(self, key, default=None):
raise NotImplementedError()
def setdefault(self, key, default=None):
raise NotImplementedError()
def update(self, E=None, **F): # known special case of dict.update
raise NotImplementedError()
def __delitem__(self, *args, **kwargs):
raise NotImplementedError()
# casefold() does not exist on Python 2 so we use lower() there
def _casefold(string):
if sys.version_info[0] == 2:
return type(string).lower(string) # the type of "string" can be unicode or str
# Class 'str' has no 'casefold' member (no-member) -- Disabled: This warning shows up on Python 2.7 pylint runs
# but this code is actually not executed on Python 2.
return str.casefold(string) # pylint: disable=no-member
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/goal_state.py 0000664 0000000 0000000 00000103452 14626177470 0026607 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import datetime
import os
import re
import time
import json
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.datacontract import set_properties
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory
from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError, GoalStateSource
from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported, VmSettingsSupportStopped
from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList, ExtHandlerPackage, ExtHandlerPackageList
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.common.utils.archive import GoalStateHistory, SHARED_CONF_FILE_NAME
from azurelinuxagent.common.utils.cryptutil import CryptUtil
from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib, gettext
GOAL_STATE_URI = "http://{0}/machine/?comp=goalstate"
CERTS_FILE_NAME = "Certificates.xml"
P7M_FILE_NAME = "Certificates.p7m"
PEM_FILE_NAME = "Certificates.pem"
TRANSPORT_CERT_FILE_NAME = "TransportCert.pem"
TRANSPORT_PRV_FILE_NAME = "TransportPrivate.pem"
_GET_GOAL_STATE_MAX_ATTEMPTS = 6
class GoalStateProperties(object):
"""
Enum for defining the properties that we fetch in the goal state
"""
RoleConfig = 0x1
HostingEnv = 0x2
SharedConfig = 0x4
ExtensionsGoalState = 0x8
Certificates = 0x10
RemoteAccessInfo = 0x20
All = RoleConfig | HostingEnv | SharedConfig | ExtensionsGoalState | Certificates | RemoteAccessInfo
class GoalStateInconsistentError(ProtocolError):
"""
Indicates an inconsistency in the goal state (e.g. missing tenant certificate)
"""
def __init__(self, msg, inner=None):
super(GoalStateInconsistentError, self).__init__(msg, inner)
class GoalState(object):
def __init__(self, wire_client, goal_state_properties=GoalStateProperties.All, silent=False, save_to_history=False):
"""
Fetches the goal state using the given wire client.
Fetching the goal state involves several HTTP requests to the WireServer and the HostGAPlugin. There is an initial request to WireServer's goalstate API,
which response includes the incarnation, role instance, container ID, role config, and URIs to the rest of the goal state (ExtensionsConfig, Certificates,
Remote Access users, etc.). Additional requests are done using those URIs (all of them point to APIs in the WireServer). Additionally, there is a
request to the HostGAPlugin for the vmSettings, which determines the goal state for extensions when using the Fast Track pipeline.
To reduce the number of requests, when possible, create a single instance of GoalState and use the update() method to keep it up to date.
"""
try:
self._wire_client = wire_client
self._history = None
self._save_to_history = save_to_history
self._extensions_goal_state = None # populated from vmSettings or extensionsConfig
self._goal_state_properties = goal_state_properties
self.logger = logger.Logger(logger.DEFAULT_LOGGER)
self.logger.silent = silent
# These properties hold the goal state from the WireServer and are initialized by self._fetch_full_wire_server_goal_state()
self._incarnation = None
self._role_instance_id = None
self._role_config_name = None
self._container_id = None
self._hosting_env = None
self._shared_conf = None
self._certs = EmptyCertificates()
self._certs_uri = None
self._remote_access = None
self.update(silent=silent)
except ProtocolError:
raise
except Exception as exception:
# We don't log the error here since fetching the goal state is done every few seconds
raise ProtocolError(msg="Error fetching goal state", inner=exception)
@property
def incarnation(self):
return self._incarnation
@property
def container_id(self):
if not self._goal_state_properties & GoalStateProperties.RoleConfig:
raise ProtocolError("ContainerId is not in goal state properties")
else:
return self._container_id
@property
def role_instance_id(self):
if not self._goal_state_properties & GoalStateProperties.RoleConfig:
raise ProtocolError("RoleInstanceId is not in goal state properties")
else:
return self._role_instance_id
@property
def role_config_name(self):
if not self._goal_state_properties & GoalStateProperties.RoleConfig:
raise ProtocolError("RoleConfig is not in goal state properties")
else:
return self._role_config_name
@property
def extensions_goal_state(self):
if not self._goal_state_properties & GoalStateProperties.ExtensionsGoalState:
raise ProtocolError("ExtensionsGoalState is not in goal state properties")
else:
return self._extensions_goal_state
@property
def certs(self):
if not self._goal_state_properties & GoalStateProperties.Certificates:
raise ProtocolError("Certificates is not in goal state properties")
else:
return self._certs
@property
def hosting_env(self):
if not self._goal_state_properties & GoalStateProperties.HostingEnv:
raise ProtocolError("HostingEnvironment is not in goal state properties")
else:
return self._hosting_env
@property
def shared_conf(self):
if not self._goal_state_properties & GoalStateProperties.SharedConfig:
raise ProtocolError("SharedConfig is not in goal state properties")
else:
return self._shared_conf
@property
def remote_access(self):
if not self._goal_state_properties & GoalStateProperties.RemoteAccessInfo:
raise ProtocolError("RemoteAccessInfo is not in goal state properties")
else:
return self._remote_access
def fetch_agent_manifest(self, family_name, uris):
"""
This is a convenience method that wraps WireClient.fetch_manifest(), but adds the required 'use_verify_header' parameter and saves
the manifest to the history folder.
"""
return self._fetch_manifest("agent", "waagent.{0}".format(family_name), uris)
def fetch_extension_manifest(self, extension_name, uris):
"""
This is a convenience method that wraps WireClient.fetch_manifest(), but adds the required 'use_verify_header' parameter and saves
the manifest to the history folder.
"""
return self._fetch_manifest("extension", extension_name, uris)
def _fetch_manifest(self, manifest_type, name, uris):
try:
is_fast_track = self.extensions_goal_state.source == GoalStateSource.FastTrack
xml_text = self._wire_client.fetch_manifest(manifest_type, uris, use_verify_header=is_fast_track)
if self._save_to_history:
self._history.save_manifest(name, xml_text)
return ExtensionManifest(xml_text)
except Exception as e:
raise ProtocolError("Failed to retrieve {0} manifest. Error: {1}".format(manifest_type, ustr(e)))
@staticmethod
def update_host_plugin_headers(wire_client):
"""
Updates the container ID and role config name that are send in the headers of HTTP requests to the HostGAPlugin
"""
# Fetching the goal state updates the HostGAPlugin so simply trigger the request
GoalState._fetch_goal_state(wire_client)
def update(self, silent=False):
"""
Updates the current GoalState instance fetching values from the WireServer/HostGAPlugin as needed
"""
self.logger.silent = silent
try:
self._update(force_update=False)
except GoalStateInconsistentError as e:
message = "Detected an inconsistency in the goal state: {0}".format(ustr(e))
self.logger.warn(message)
add_event(op=WALAEventOperation.GoalState, is_success=False, message=message)
self._update(force_update=True)
message = "The goal state is consistent"
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
def _update(self, force_update):
#
# Fetch the goal state from both the HGAP and the WireServer
#
timestamp = datetime.datetime.utcnow()
if force_update:
message = "Refreshing goal state and vmSettings"
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
incarnation, xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client)
goal_state_updated = force_update or incarnation != self._incarnation
if goal_state_updated:
message = 'Fetched a new incarnation for the WireServer goal state [incarnation {0}]'.format(incarnation)
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
vm_settings, vm_settings_updated = None, False
if self._goal_state_properties & GoalStateProperties.ExtensionsGoalState:
try:
vm_settings, vm_settings_updated = GoalState._fetch_vm_settings(self._wire_client, force_update=force_update)
except VmSettingsSupportStopped as exception: # If the HGAP stopped supporting vmSettings, we need to use the goal state from the WireServer
self._restore_wire_server_goal_state(incarnation, xml_text, xml_doc, exception)
return
if vm_settings_updated:
self.logger.info('')
message = "Fetched new vmSettings [HostGAPlugin correlation ID: {0} eTag: {1} source: {2}]".format(vm_settings.hostga_plugin_correlation_id, vm_settings.etag, vm_settings.source)
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
# Ignore the vmSettings if their source is Fabric (processing a Fabric goal state may require the tenant certificate and the vmSettings don't include it.)
if vm_settings is not None and vm_settings.source == GoalStateSource.Fabric:
if vm_settings_updated:
message = "The vmSettings originated via Fabric; will ignore them."
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
vm_settings, vm_settings_updated = None, False
# If neither goal state has changed we are done with the update
if not goal_state_updated and not vm_settings_updated:
return
# Start a new history subdirectory and capture the updated goal state
tag = "{0}".format(incarnation) if vm_settings is None else "{0}-{1}".format(incarnation, vm_settings.etag)
if self._save_to_history:
self._history = GoalStateHistory(timestamp, tag)
if goal_state_updated:
self._history.save_goal_state(xml_text)
if vm_settings_updated:
self._history.save_vm_settings(vm_settings.get_redacted_text())
#
# Continue fetching the rest of the goal state
#
extensions_config = None
if goal_state_updated:
extensions_config = self._fetch_full_wire_server_goal_state(incarnation, xml_doc)
#
# Lastly, decide whether to use the vmSettings or extensionsConfig for the extensions goal state
#
if goal_state_updated and vm_settings_updated:
most_recent = vm_settings if vm_settings.created_on_timestamp > extensions_config.created_on_timestamp else extensions_config
elif goal_state_updated:
most_recent = extensions_config
else: # vm_settings_updated
most_recent = vm_settings
if self._extensions_goal_state is None or most_recent.created_on_timestamp >= self._extensions_goal_state.created_on_timestamp:
self._extensions_goal_state = most_recent
#
# For Fast Track goal states, verify that the required certificates are in the goal state.
#
# Some scenarios can produce inconsistent goal states. For example, during hibernation/resume, the Fabric goal state changes (the
# tenant certificate is re-generated when the VM is restarted) *without* the incarnation necessarily changing (e.g. if the incarnation
# is 1 before the hibernation; on resume the incarnation is set to 1 even though the goal state has a new certificate). If a Fast
# Track goal state comes after that, the extensions will need the new certificate. The Agent needs to refresh the goal state in that
# case, to ensure it fetches the new certificate.
#
if self._extensions_goal_state.source == GoalStateSource.FastTrack and self._goal_state_properties & GoalStateProperties.Certificates:
self._check_certificates()
self._check_and_download_missing_certs_on_disk()
def _check_certificates(self):
# Check that certificates needed by extensions are in goal state certs.summary
for extension in self.extensions_goal_state.extensions:
for settings in extension.settings:
if settings.protectedSettings is None:
continue
certificates = self.certs.summary
if not any(settings.certificateThumbprint == c['thumbprint'] for c in certificates):
message = "Certificate {0} needed by {1} is missing from the goal state".format(settings.certificateThumbprint, extension.name)
raise GoalStateInconsistentError(message)
def _download_certificates(self, certs_uri):
xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert())
certs = Certificates(xml_text, self.logger)
# Log and save the certificates summary (i.e. the thumbprint but not the certificate itself) to the goal state history
for c in certs.summary:
message = "Downloaded certificate {0}".format(c)
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
if len(certs.warnings) > 0:
self.logger.warn(certs.warnings)
add_event(op=WALAEventOperation.GoalState, message=certs.warnings)
if self._save_to_history:
self._history.save_certificates(json.dumps(certs.summary))
return certs
def _check_and_download_missing_certs_on_disk(self):
# Re-download certificates if any have been removed from disk since last download
if self._certs_uri is not None:
certificates = self.certs.summary
certs_missing_from_disk = False
for c in certificates:
cert_path = os.path.join(conf.get_lib_dir(), c['thumbprint'] + '.crt')
if not os.path.isfile(cert_path):
certs_missing_from_disk = True
message = "Certificate required by goal state is not on disk: {0}".format(cert_path)
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
if certs_missing_from_disk:
# Try to re-download certs. Sometimes download may fail if certs_uri is outdated/contains wrong
# container id (for example, when the VM is moved to a new container after resuming from
# hibernation). If download fails we should report and continue with goal state processing, as some
# extensions in the goal state may succeed.
try:
self._download_certificates(self._certs_uri)
except Exception as e:
message = "Unable to download certificates. Goal state processing will continue, some " \
"extensions requiring certificates may fail. Error: {0}".format(ustr(e))
self.logger.warn(message)
add_event(op=WALAEventOperation.GoalState, is_success=False, message=message)
def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_settings_support_stopped_error):
msg = 'The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.'
self.logger.info(msg)
add_event(op=WALAEventOperation.VmSettings, message=msg)
if self._save_to_history:
self._history = GoalStateHistory(datetime.datetime.utcnow(), incarnation)
self._history.save_goal_state(xml_text)
self._extensions_goal_state = self._fetch_full_wire_server_goal_state(incarnation, xml_doc)
if self._extensions_goal_state.created_on_timestamp < vm_settings_support_stopped_error.timestamp:
self._extensions_goal_state.is_outdated = True
msg = "Fetched a Fabric goal state older than the most recent FastTrack goal state; will skip it.\nFabric: {0}\nFastTrack: {1}".format(
self._extensions_goal_state.created_on_timestamp, vm_settings_support_stopped_error.timestamp)
self.logger.info(msg)
add_event(op=WALAEventOperation.VmSettings, message=msg)
def save_to_history(self, data, file_name):
if self._save_to_history:
self._history.save(data, file_name)
@staticmethod
def _fetch_goal_state(wire_client):
"""
Issues an HTTP request for the goal state (WireServer) and returns a tuple containing the response as text and as an XML Document
"""
uri = GOAL_STATE_URI.format(wire_client.get_endpoint())
# In some environments a few goal state requests return a missing RoleInstance; these retries are used to work around that issue
# TODO: Consider retrying on 410 (ResourceGone) as well
incarnation = "unknown"
for _ in range(0, _GET_GOAL_STATE_MAX_ATTEMPTS):
xml_text = wire_client.fetch_config(uri, wire_client.get_header())
xml_doc = parse_doc(xml_text)
incarnation = findtext(xml_doc, "Incarnation")
role_instance = find(xml_doc, "RoleInstance")
if role_instance:
break
time.sleep(0.5)
else:
raise ProtocolError("Fetched goal state without a RoleInstance [incarnation {inc}]".format(inc=incarnation))
# Telemetry and the HostGAPlugin depend on the container id/role config; keep them up-to-date each time we fetch the goal state
# (note that these elements can change even if the incarnation of the goal state does not change)
container = find(xml_doc, "Container")
container_id = findtext(container, "ContainerId")
role_config = find(role_instance, "Configuration")
role_config_name = findtext(role_config, "ConfigName")
AgentGlobals.update_container_id(container_id) # Telemetry uses this global to pick up the container id
wire_client.update_host_plugin(container_id, role_config_name)
return incarnation, xml_text, xml_doc
@staticmethod
def _fetch_vm_settings(wire_client, force_update=False):
"""
Issues an HTTP request (HostGAPlugin) for the vm settings and returns the response as an ExtensionsGoalState.
"""
vm_settings, vm_settings_updated = (None, False)
if conf.get_enable_fast_track():
try:
try:
vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update)
except ResourceGoneError:
# retry after refreshing the HostGAPlugin
GoalState.update_host_plugin_headers(wire_client)
vm_settings, vm_settings_updated = wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update)
except VmSettingsSupportStopped:
raise
except VmSettingsNotSupported:
pass
except VmSettingsParseError as exception:
# ensure we save the vmSettings if there were parsing errors, but save them only once per ETag
if not GoalStateHistory.tag_exists(exception.etag):
GoalStateHistory(datetime.datetime.utcnow(), exception.etag).save_vm_settings(exception.vm_settings_text)
raise
return vm_settings, vm_settings_updated
def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc):
"""
Issues HTTP requests (to the WireServer) for each of the URIs in the goal state (ExtensionsConfig, Certificate, Remote Access users, etc)
and populates the corresponding properties.
Returns the value of ExtensionsConfig.
"""
try:
self.logger.info('')
message = 'Fetching full goal state from the WireServer [incarnation {0}]'.format(incarnation)
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
role_instance_id = None
role_config_name = None
container_id = None
if GoalStateProperties.RoleConfig & self._goal_state_properties:
role_instance = find(xml_doc, "RoleInstance")
role_instance_id = findtext(role_instance, "InstanceId")
role_config = find(role_instance, "Configuration")
role_config_name = findtext(role_config, "ConfigName")
container = find(xml_doc, "Container")
container_id = findtext(container, "ContainerId")
extensions_config_uri = findtext(xml_doc, "ExtensionsConfig")
if not (GoalStateProperties.ExtensionsGoalState & self._goal_state_properties) or extensions_config_uri is None:
extensions_config = ExtensionsGoalStateFactory.create_empty(incarnation)
else:
xml_text = self._wire_client.fetch_config(extensions_config_uri, self._wire_client.get_header())
extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(incarnation, xml_text, self._wire_client)
if self._save_to_history:
self._history.save_extensions_config(extensions_config.get_redacted_text())
hosting_env = None
if GoalStateProperties.HostingEnv & self._goal_state_properties:
hosting_env_uri = findtext(xml_doc, "HostingEnvironmentConfig")
xml_text = self._wire_client.fetch_config(hosting_env_uri, self._wire_client.get_header())
hosting_env = HostingEnv(xml_text)
if self._save_to_history:
self._history.save_hosting_env(xml_text)
shared_config = None
if GoalStateProperties.SharedConfig & self._goal_state_properties:
shared_conf_uri = findtext(xml_doc, "SharedConfig")
xml_text = self._wire_client.fetch_config(shared_conf_uri, self._wire_client.get_header())
shared_config = SharedConfig(xml_text)
if self._save_to_history:
self._history.save_shared_conf(xml_text)
# SharedConfig.xml is used by other components (Azsec and Singularity/HPC Infiniband), so save it to the agent's root directory as well
shared_config_file = os.path.join(conf.get_lib_dir(), SHARED_CONF_FILE_NAME)
try:
fileutil.write_file(shared_config_file, xml_text)
except Exception as e:
logger.warn("Failed to save {0}: {1}".format(shared_config, e))
certs = EmptyCertificates()
certs_uri = findtext(xml_doc, "Certificates")
if (GoalStateProperties.Certificates & self._goal_state_properties) and certs_uri is not None:
certs = self._download_certificates(certs_uri)
remote_access = None
if GoalStateProperties.RemoteAccessInfo & self._goal_state_properties:
remote_access_uri = findtext(container, "RemoteAccessInfo")
if remote_access_uri is not None:
xml_text = self._wire_client.fetch_config(remote_access_uri, self._wire_client.get_header_for_cert())
remote_access = RemoteAccess(xml_text)
if self._save_to_history:
self._history.save_remote_access(xml_text)
self._incarnation = incarnation
self._role_instance_id = role_instance_id
self._role_config_name = role_config_name
self._container_id = container_id
self._hosting_env = hosting_env
self._shared_conf = shared_config
self._certs = certs
self._certs_uri = certs_uri
self._remote_access = remote_access
return extensions_config
except Exception as exception:
self.logger.warn("Fetching the goal state failed: {0}", ustr(exception))
raise ProtocolError(msg="Error fetching goal state", inner=exception)
finally:
message = 'Fetch goal state completed'
self.logger.info(message)
add_event(op=WALAEventOperation.GoalState, message=message)
class HostingEnv(object):
def __init__(self, xml_text):
self.xml_text = xml_text
xml_doc = parse_doc(xml_text)
incarnation = find(xml_doc, "Incarnation")
self.vm_name = getattrib(incarnation, "instance")
role = find(xml_doc, "Role")
self.role_name = getattrib(role, "name")
deployment = find(xml_doc, "Deployment")
self.deployment_name = getattrib(deployment, "name")
class SharedConfig(object):
def __init__(self, xml_text):
self.xml_text = xml_text
class Certificates(object):
def __init__(self, xml_text, my_logger):
self.cert_list = CertList()
self.summary = [] # debugging info
self.warnings = []
# Save the certificates
local_file = os.path.join(conf.get_lib_dir(), CERTS_FILE_NAME)
fileutil.write_file(local_file, xml_text)
# Separate the certificates into individual files.
xml_doc = parse_doc(xml_text)
data = findtext(xml_doc, "Data")
if data is None:
return
# if the certificates format is not Pkcs7BlobWithPfxContents do not parse it
certificate_format = findtext(xml_doc, "Format")
if certificate_format and certificate_format != "Pkcs7BlobWithPfxContents":
message = "The Format is not Pkcs7BlobWithPfxContents. Format is {0}".format(certificate_format)
my_logger.warn(message)
add_event(op=WALAEventOperation.GoalState, message=message)
return
cryptutil = CryptUtil(conf.get_openssl_cmd())
p7m_file = os.path.join(conf.get_lib_dir(), P7M_FILE_NAME)
p7m = ("MIME-Version:1.0\n" # pylint: disable=W1308
"Content-Disposition: attachment; filename=\"{0}\"\n"
"Content-Type: application/x-pkcs7-mime; name=\"{1}\"\n"
"Content-Transfer-Encoding: base64\n"
"\n"
"{2}").format(p7m_file, p7m_file, data)
fileutil.write_file(p7m_file, p7m)
trans_prv_file = os.path.join(conf.get_lib_dir(), TRANSPORT_PRV_FILE_NAME)
trans_cert_file = os.path.join(conf.get_lib_dir(), TRANSPORT_CERT_FILE_NAME)
pem_file = os.path.join(conf.get_lib_dir(), PEM_FILE_NAME)
# decrypt certificates
cryptutil.decrypt_p7m(p7m_file, trans_prv_file, trans_cert_file, pem_file)
# The parsing process use public key to match prv and crt.
buf = []
prvs = {}
thumbprints = {}
index = 0
v1_cert_list = []
with open(pem_file) as pem:
for line in pem.readlines():
buf.append(line)
if re.match(r'[-]+END.*KEY[-]+', line):
tmp_file = Certificates._write_to_tmp_file(index, 'prv', buf)
pub = cryptutil.get_pubkey_from_prv(tmp_file)
prvs[pub] = tmp_file
buf = []
index += 1
elif re.match(r'[-]+END.*CERTIFICATE[-]+', line):
tmp_file = Certificates._write_to_tmp_file(index, 'crt', buf)
pub = cryptutil.get_pubkey_from_crt(tmp_file)
thumbprint = cryptutil.get_thumbprint_from_crt(tmp_file)
thumbprints[pub] = thumbprint
# Rename crt with thumbprint as the file name
crt = "{0}.crt".format(thumbprint)
v1_cert_list.append({
"name": None,
"thumbprint": thumbprint
})
os.rename(tmp_file, os.path.join(conf.get_lib_dir(), crt))
buf = []
index += 1
# Rename prv key with thumbprint as the file name
for pubkey in prvs:
thumbprint = thumbprints[pubkey]
if thumbprint:
tmp_file = prvs[pubkey]
prv = "{0}.prv".format(thumbprint)
os.rename(tmp_file, os.path.join(conf.get_lib_dir(), prv))
else:
# Since private key has *no* matching certificate,
# it will not be named correctly
self.warnings.append("Found NO matching cert/thumbprint for private key!")
for pubkey, thumbprint in thumbprints.items():
has_private_key = pubkey in prvs
self.summary.append({"thumbprint": thumbprint, "hasPrivateKey": has_private_key})
for v1_cert in v1_cert_list:
cert = Cert()
set_properties("certs", cert, v1_cert)
self.cert_list.certificates.append(cert)
@staticmethod
def _write_to_tmp_file(index, suffix, buf):
file_name = os.path.join(conf.get_lib_dir(), "{0}.{1}".format(index, suffix))
fileutil.write_file(file_name, "".join(buf))
return file_name
class EmptyCertificates:
def __init__(self):
self.cert_list = CertList()
self.summary = [] # debugging info
self.warnings = []
class RemoteAccess(object):
"""
Object containing information about user accounts
"""
#
#
#
#
#
#
#
#
#
#
#
#
#
def __init__(self, xml_text):
self.xml_text = xml_text
self.version = None
self.incarnation = None
self.user_list = RemoteAccessUsersList()
if self.xml_text is None or len(self.xml_text) == 0:
return
xml_doc = parse_doc(self.xml_text)
self.version = findtext(xml_doc, "Version")
self.incarnation = findtext(xml_doc, "Incarnation")
user_collection = find(xml_doc, "Users")
users = findall(user_collection, "User")
for user in users:
remote_access_user = RemoteAccess._parse_user(user)
self.user_list.users.append(remote_access_user)
@staticmethod
def _parse_user(user):
name = findtext(user, "Name")
encrypted_password = findtext(user, "Password")
expiration = findtext(user, "Expiration")
remote_access_user = RemoteAccessUser(name, encrypted_password, expiration)
return remote_access_user
class ExtensionManifest(object):
def __init__(self, xml_text):
if xml_text is None:
raise ValueError("ExtensionManifest is None")
logger.verbose("Load ExtensionManifest.xml")
self.pkg_list = ExtHandlerPackageList()
self._parse(xml_text)
def _parse(self, xml_text):
xml_doc = parse_doc(xml_text)
self._handle_packages(findall(find(xml_doc,
"Plugins"),
"Plugin"),
False)
self._handle_packages(findall(find(xml_doc,
"InternalPlugins"),
"Plugin"),
True)
def _handle_packages(self, packages, isinternal):
for package in packages:
version = findtext(package, "Version")
disallow_major_upgrade = findtext(package,
"DisallowMajorVersionUpgrade")
if disallow_major_upgrade is None:
disallow_major_upgrade = ''
disallow_major_upgrade = disallow_major_upgrade.lower() == "true"
uris = find(package, "Uris")
uri_list = findall(uris, "Uri")
uri_list = [gettext(x) for x in uri_list]
pkg = ExtHandlerPackage()
pkg.version = version
pkg.disallow_major_upgrade = disallow_major_upgrade
for uri in uri_list:
pkg.uris.append(uri)
pkg.isinternal = isinternal
self.pkg_list.versions.append(pkg)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/healthservice.py 0000664 0000000 0000000 00000015233 14626177470 0027312 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import json
from azurelinuxagent.common import logger
from azurelinuxagent.common.exception import HttpError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.utils import restutil
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
class Observation(object):
def __init__(self, name, is_healthy, description='', value=''):
if name is None:
raise ValueError("Observation name must be provided")
if is_healthy is None:
raise ValueError("Observation health must be provided")
if value is None:
value = ''
if description is None:
description = ''
self.name = name
self.is_healthy = is_healthy
self.description = description
self.value = value
@property
def as_obj(self):
return {
"ObservationName": self.name[:64],
"IsHealthy": self.is_healthy,
"Description": self.description[:128],
"Value": self.value[:128]
}
class HealthService(object):
ENDPOINT = 'http://{0}:80/HealthService'
API = 'reporttargethealth'
VERSION = "1.0"
OBSERVER_NAME = 'WALinuxAgent'
HOST_PLUGIN_HEARTBEAT_OBSERVATION_NAME = 'GuestAgentPluginHeartbeat'
HOST_PLUGIN_STATUS_OBSERVATION_NAME = 'GuestAgentPluginStatus'
HOST_PLUGIN_VERSIONS_OBSERVATION_NAME = 'GuestAgentPluginVersions'
HOST_PLUGIN_ARTIFACT_OBSERVATION_NAME = 'GuestAgentPluginArtifact'
IMDS_OBSERVATION_NAME = 'InstanceMetadataHeartbeat'
MAX_OBSERVATIONS = 10
def __init__(self, endpoint):
self.endpoint = HealthService.ENDPOINT.format(endpoint)
self.api = HealthService.API
self.version = HealthService.VERSION
self.source = HealthService.OBSERVER_NAME
self.observations = list()
@property
def as_json(self):
data = {
"Api": self.api,
"Version": self.version,
"Source": self.source,
"Observations": [o.as_obj for o in self.observations]
}
return json.dumps(data)
def report_host_plugin_heartbeat(self, is_healthy):
"""
Reports a signal for /health
:param is_healthy: whether the call succeeded
"""
self._observe(name=HealthService.HOST_PLUGIN_HEARTBEAT_OBSERVATION_NAME,
is_healthy=is_healthy)
self._report()
def report_host_plugin_versions(self, is_healthy, response):
"""
Reports a signal for /versions
:param is_healthy: whether the api call succeeded
:param response: debugging information for failures
"""
self._observe(name=HealthService.HOST_PLUGIN_VERSIONS_OBSERVATION_NAME,
is_healthy=is_healthy,
value=response)
self._report()
def report_host_plugin_extension_artifact(self, is_healthy, source, response):
"""
Reports a signal for /extensionArtifact
:param is_healthy: whether the api call succeeded
:param source: specifies the api caller for debugging failures
:param response: debugging information for failures
"""
self._observe(name=HealthService.HOST_PLUGIN_ARTIFACT_OBSERVATION_NAME,
is_healthy=is_healthy,
description=source,
value=response)
self._report()
def report_host_plugin_status(self, is_healthy, response):
"""
Reports a signal for /status
:param is_healthy: whether the api call succeeded
:param response: debugging information for failures
"""
self._observe(name=HealthService.HOST_PLUGIN_STATUS_OBSERVATION_NAME,
is_healthy=is_healthy,
value=response)
self._report()
def report_imds_status(self, is_healthy, response):
"""
Reports a signal for /metadata/instance
:param is_healthy: whether the api call succeeded and returned valid data
:param response: debugging information for failures
"""
self._observe(name=HealthService.IMDS_OBSERVATION_NAME,
is_healthy=is_healthy,
value=response)
self._report()
def _observe(self, name, is_healthy, value='', description=''):
# ensure we keep the list size within bounds
if len(self.observations) >= HealthService.MAX_OBSERVATIONS:
del self.observations[:HealthService.MAX_OBSERVATIONS-1]
self.observations.append(Observation(name=name,
is_healthy=is_healthy,
value=value,
description=description))
def _report(self):
logger.verbose('HealthService: report observations')
try:
restutil.http_post(self.endpoint, self.as_json, headers={'Content-Type': 'application/json'})
logger.verbose('HealthService: Reported observations to {0}: {1}', self.endpoint, self.as_json)
except HttpError as e:
logger.warn("HealthService: could not report observations: {0}", ustr(e))
finally:
# report any failures via telemetry
self._report_failures()
# these signals are not timestamped, so there is no value in persisting data
del self.observations[:]
def _report_failures(self):
try:
logger.verbose("HealthService: report failures as telemetry")
from azurelinuxagent.common.event import add_event, WALAEventOperation
for o in self.observations:
if not o.is_healthy:
add_event(AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.HealthObservation,
is_success=False,
message=json.dumps(o.as_obj))
except Exception as e:
logger.verbose("HealthService: could not report failures: {0}".format(ustr(e)))
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/hostplugin.py 0000664 0000000 0000000 00000075411 14626177470 0026664 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import base64
import datetime
import json
import os.path
import uuid
from azurelinuxagent.common import logger, conf
from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.future import ustr, httpclient
from azurelinuxagent.common.protocol.healthservice import HealthService
from azurelinuxagent.common.protocol.extensions_goal_state import VmSettingsParseError, GoalStateSource
from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory
from azurelinuxagent.common.utils import restutil, textutil, timeutil
from azurelinuxagent.common.utils.textutil import remove_bom
from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, PY_VERSION_MAJOR
HOST_PLUGIN_PORT = 32526
URI_FORMAT_GET_API_VERSIONS = "http://{0}:{1}/versions"
URI_FORMAT_VM_SETTINGS = "http://{0}:{1}/vmSettings"
URI_FORMAT_GET_EXTENSION_ARTIFACT = "http://{0}:{1}/extensionArtifact"
URI_FORMAT_PUT_VM_STATUS = "http://{0}:{1}/status"
URI_FORMAT_PUT_LOG = "http://{0}:{1}/vmAgentLog"
URI_FORMAT_HEALTH = "http://{0}:{1}/health"
API_VERSION = "2015-09-01"
_HEADER_CLIENT_NAME = "x-ms-client-name"
_HEADER_CLIENT_VERSION = "x-ms-client-version"
_HEADER_CORRELATION_ID = "x-ms-client-correlationid"
_HEADER_CONTAINER_ID = "x-ms-containerid"
_HEADER_DEPLOYMENT_ID = "x-ms-vmagentlog-deploymentid"
_HEADER_VERSION = "x-ms-version"
_HEADER_HOST_CONFIG_NAME = "x-ms-host-config-name"
_HEADER_ARTIFACT_LOCATION = "x-ms-artifact-location"
_HEADER_ARTIFACT_MANIFEST_LOCATION = "x-ms-artifact-manifest-location"
_HEADER_VERIFY_FROM_ARTIFACTS_BLOB = "x-ms-verify-from-artifacts-blob"
MAXIMUM_PAGEBLOB_PAGE_SIZE = 4 * 1024 * 1024 # Max page size: 4MB
class HostPluginProtocol(object):
is_default_channel = False
FETCH_REPORTING_PERIOD = datetime.timedelta(minutes=1)
STATUS_REPORTING_PERIOD = datetime.timedelta(minutes=1)
def __init__(self, endpoint):
"""
NOTE: Before using the HostGAPlugin be sure to invoke GoalState.update_host_plugin_headers() to initialize
the container id and role config name
"""
if endpoint is None:
raise ProtocolError("HostGAPlugin: Endpoint not provided")
self.is_initialized = False
self.is_available = False
self.api_versions = None
self.endpoint = endpoint
self.container_id = None
self.deployment_id = None
self.role_config_name = None
self.manifest_uri = None
self.health_service = HealthService(endpoint)
self.fetch_error_state = ErrorState(min_timedelta=ERROR_STATE_HOST_PLUGIN_FAILURE)
self.status_error_state = ErrorState(min_timedelta=ERROR_STATE_HOST_PLUGIN_FAILURE)
self.fetch_last_timestamp = None
self.status_last_timestamp = None
self._version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown"
self._supports_vm_settings = None # Tri-state variable: None == Not Initialized, True == Supports, False == Does Not Support
self._supports_vm_settings_next_check = datetime.datetime.now()
self._vm_settings_error_reporter = _VmSettingsErrorReporter()
self._cached_vm_settings = None # Cached value of the most recent vmSettings
# restore the state of Fast Track
if not os.path.exists(self._get_fast_track_state_file()):
self._supports_vm_settings = False
self._supports_vm_settings_next_check = datetime.datetime.now()
self._fast_track_timestamp = timeutil.create_timestamp(datetime.datetime.min)
else:
self._supports_vm_settings = True
self._supports_vm_settings_next_check = datetime.datetime.now()
self._fast_track_timestamp = HostPluginProtocol.get_fast_track_timestamp()
@staticmethod
def _extract_deployment_id(role_config_name):
# Role config name consists of: .(...)
return role_config_name.split(".")[0] if role_config_name is not None else None
def check_vm_settings_support(self):
"""
Returns True if the HostGAPlugin supports the vmSettings API.
"""
# _host_plugin_supports_vm_settings is set by fetch_vm_settings()
if self._supports_vm_settings is None:
_, _ = self.fetch_vm_settings()
return self._supports_vm_settings
def update_container_id(self, new_container_id):
self.container_id = new_container_id
def update_role_config_name(self, new_role_config_name):
self.role_config_name = new_role_config_name
self.deployment_id = self._extract_deployment_id(new_role_config_name)
def update_manifest_uri(self, new_manifest_uri):
self.manifest_uri = new_manifest_uri
def ensure_initialized(self):
if not self.is_initialized:
self.api_versions = self.get_api_versions()
self.is_available = API_VERSION in self.api_versions
self.is_initialized = self.is_available
add_event(op=WALAEventOperation.InitializeHostPlugin,
is_success=self.is_available)
return self.is_available
def get_health(self):
"""
Call the /health endpoint
:return: True if 200 received, False otherwise
"""
url = URI_FORMAT_HEALTH.format(self.endpoint,
HOST_PLUGIN_PORT)
logger.verbose("HostGAPlugin: Getting health from [{0}]", url)
response = restutil.http_get(url, max_retry=1)
return restutil.request_succeeded(response)
def get_api_versions(self):
url = URI_FORMAT_GET_API_VERSIONS.format(self.endpoint,
HOST_PLUGIN_PORT)
logger.verbose("HostGAPlugin: Getting API versions at [{0}]"
.format(url))
return_val = []
error_response = ''
is_healthy = False
try:
headers = {_HEADER_CONTAINER_ID: self.container_id}
response = restutil.http_get(url, headers)
if restutil.request_failed(response):
error_response = restutil.read_response_error(response)
logger.error("HostGAPlugin: Failed Get API versions: {0}".format(error_response))
is_healthy = not restutil.request_failed_at_hostplugin(response)
else:
return_val = ustr(remove_bom(response.read()), encoding='utf-8')
is_healthy = True
except HttpError as e:
logger.error("HostGAPlugin: Exception Get API versions: {0}".format(e))
self.health_service.report_host_plugin_versions(is_healthy=is_healthy, response=error_response)
return return_val
def get_vm_settings_request(self, correlation_id):
url = URI_FORMAT_VM_SETTINGS.format(self.endpoint, HOST_PLUGIN_PORT)
headers = {
_HEADER_VERSION: API_VERSION,
_HEADER_CONTAINER_ID: self.container_id,
_HEADER_HOST_CONFIG_NAME: self.role_config_name,
_HEADER_CORRELATION_ID: correlation_id
}
return url, headers
def get_artifact_request(self, artifact_url, use_verify_header, artifact_manifest_url=None):
if not self.ensure_initialized():
raise ProtocolError("HostGAPlugin: Host plugin channel is not available")
if textutil.is_str_none_or_whitespace(artifact_url):
raise ProtocolError("HostGAPlugin: No extension artifact url was provided")
url = URI_FORMAT_GET_EXTENSION_ARTIFACT.format(self.endpoint, HOST_PLUGIN_PORT)
headers = {
_HEADER_VERSION: API_VERSION,
_HEADER_CONTAINER_ID: self.container_id,
_HEADER_HOST_CONFIG_NAME: self.role_config_name,
_HEADER_ARTIFACT_LOCATION: artifact_url}
if use_verify_header:
headers[_HEADER_VERIFY_FROM_ARTIFACTS_BLOB] = "true"
if artifact_manifest_url is not None:
headers[_HEADER_ARTIFACT_MANIFEST_LOCATION] = artifact_manifest_url
return url, headers
def report_fetch_health(self, uri, is_healthy=True, source='', response=''):
if uri != URI_FORMAT_GET_EXTENSION_ARTIFACT.format(self.endpoint, HOST_PLUGIN_PORT):
return
if self.should_report(is_healthy,
self.fetch_error_state,
self.fetch_last_timestamp,
HostPluginProtocol.FETCH_REPORTING_PERIOD):
self.fetch_last_timestamp = datetime.datetime.utcnow()
health_signal = self.fetch_error_state.is_triggered() is False
self.health_service.report_host_plugin_extension_artifact(is_healthy=health_signal,
source=source,
response=response)
def report_status_health(self, is_healthy, response=''):
if self.should_report(is_healthy,
self.status_error_state,
self.status_last_timestamp,
HostPluginProtocol.STATUS_REPORTING_PERIOD):
self.status_last_timestamp = datetime.datetime.utcnow()
health_signal = self.status_error_state.is_triggered() is False
self.health_service.report_host_plugin_status(is_healthy=health_signal,
response=response)
@staticmethod
def should_report(is_healthy, error_state, last_timestamp, period):
"""
Determine whether a health signal should be reported
:param is_healthy: whether the current measurement is healthy
:param error_state: the error state which is tracking time since failure
:param last_timestamp: the last measurement time stamp
:param period: the reporting period
:return: True if the signal should be reported, False otherwise
"""
if is_healthy:
# we only reset the error state upon success, since we want to keep
# reporting the failure; this is different to other uses of error states
# which do not have a separate periodicity
error_state.reset()
else:
error_state.incr()
if last_timestamp is None:
last_timestamp = datetime.datetime.utcnow() - period
return datetime.datetime.utcnow() >= (last_timestamp + period)
def put_vm_log(self, content):
"""
Try to upload VM logs, a compressed zip file, via the host plugin /vmAgentLog channel.
:param content: the binary content of the zip file to upload
"""
if not self.ensure_initialized():
raise ProtocolError("HostGAPlugin: HostGAPlugin is not available")
if content is None:
raise ProtocolError("HostGAPlugin: Invalid argument passed to upload VM logs. Content was not provided.")
url = URI_FORMAT_PUT_LOG.format(self.endpoint, HOST_PLUGIN_PORT)
response = restutil.http_put(url,
data=content,
headers=self._build_log_headers(),
redact_data=True,
timeout=30)
if restutil.request_failed(response):
error_response = restutil.read_response_error(response)
raise HttpError("HostGAPlugin: Upload VM logs failed: {0}".format(error_response))
return response
def put_vm_status(self, status_blob, sas_url, config_blob_type=None):
"""
Try to upload the VM status via the host plugin /status channel
:param sas_url: the blob SAS url to pass to the host plugin
:param config_blob_type: the blob type from the extension config
:type status_blob: StatusBlob
"""
if not self.ensure_initialized():
raise ProtocolError("HostGAPlugin: HostGAPlugin is not available")
if status_blob is None or status_blob.vm_status is None:
raise ProtocolError("HostGAPlugin: Status blob was not provided")
logger.verbose("HostGAPlugin: Posting VM status")
blob_type = status_blob.type if status_blob.type else config_blob_type
if blob_type == "BlockBlob":
self._put_block_blob_status(sas_url, status_blob)
else:
self._put_page_blob_status(sas_url, status_blob)
def _put_block_blob_status(self, sas_url, status_blob):
url = URI_FORMAT_PUT_VM_STATUS.format(self.endpoint, HOST_PLUGIN_PORT)
response = restutil.http_put(url,
data=self._build_status_data(
sas_url,
status_blob.get_block_blob_headers(len(status_blob.data)),
bytearray(status_blob.data, encoding='utf-8')),
headers=self._build_status_headers())
if restutil.request_failed(response):
error_response = restutil.read_response_error(response)
is_healthy = not restutil.request_failed_at_hostplugin(response)
self.report_status_health(is_healthy=is_healthy, response=error_response)
raise HttpError("HostGAPlugin: Put BlockBlob failed: {0}"
.format(error_response))
else:
self.report_status_health(is_healthy=True)
logger.verbose("HostGAPlugin: Put BlockBlob status succeeded")
def _put_page_blob_status(self, sas_url, status_blob):
url = URI_FORMAT_PUT_VM_STATUS.format(self.endpoint, HOST_PLUGIN_PORT)
# Convert the status into a blank-padded string whose length is modulo 512
status = bytearray(status_blob.data, encoding='utf-8')
status_size = int((len(status) + 511) / 512) * 512
status = bytearray(status_blob.data.ljust(status_size), encoding='utf-8')
# First, initialize an empty blob
response = restutil.http_put(url,
data=self._build_status_data(
sas_url,
status_blob.get_page_blob_create_headers(status_size)),
headers=self._build_status_headers())
if restutil.request_failed(response):
error_response = restutil.read_response_error(response)
is_healthy = not restutil.request_failed_at_hostplugin(response)
self.report_status_health(is_healthy=is_healthy, response=error_response)
raise HttpError("HostGAPlugin: Failed PageBlob clean-up: {0}"
.format(error_response))
else:
self.report_status_health(is_healthy=True)
logger.verbose("HostGAPlugin: PageBlob clean-up succeeded")
# Then, upload the blob in pages
if sas_url.count("?") <= 0:
sas_url = "{0}?comp=page".format(sas_url)
else:
sas_url = "{0}&comp=page".format(sas_url)
start = 0
end = 0
while start < len(status):
# Create the next page
end = start + min(len(status) - start, MAXIMUM_PAGEBLOB_PAGE_SIZE)
page_size = int((end - start + 511) / 512) * 512
buf = bytearray(page_size)
buf[0: end - start] = status[start: end]
# Send the page
response = restutil.http_put(url,
data=self._build_status_data(
sas_url,
status_blob.get_page_blob_page_headers(start, end),
buf),
headers=self._build_status_headers())
if restutil.request_failed(response):
error_response = restutil.read_response_error(response)
is_healthy = not restutil.request_failed_at_hostplugin(response)
self.report_status_health(is_healthy=is_healthy, response=error_response)
raise HttpError(
"HostGAPlugin Error: Put PageBlob bytes "
"[{0},{1}]: {2}".format(start, end, error_response))
# Advance to the next page (if any)
start = end
def _build_status_data(self, sas_url, blob_headers, content=None):
headers = []
for name in iter(blob_headers.keys()):
headers.append({
'headerName': name,
'headerValue': blob_headers[name]
})
data = {
'requestUri': sas_url,
'headers': headers
}
if not content is None:
data['content'] = self._base64_encode(content)
return json.dumps(data, sort_keys=True)
def _build_status_headers(self):
return {
_HEADER_VERSION: API_VERSION,
"Content-type": "application/json",
_HEADER_CONTAINER_ID: self.container_id,
_HEADER_HOST_CONFIG_NAME: self.role_config_name
}
def _build_log_headers(self):
return {
_HEADER_VERSION: API_VERSION,
_HEADER_CONTAINER_ID: self.container_id,
_HEADER_DEPLOYMENT_ID: self.deployment_id,
_HEADER_CLIENT_NAME: AGENT_NAME,
_HEADER_CLIENT_VERSION: AGENT_VERSION,
_HEADER_CORRELATION_ID: str(uuid.uuid4())
}
def _base64_encode(self, data):
s = base64.b64encode(bytes(data))
if PY_VERSION_MAJOR > 2:
return s.decode('utf-8')
return s
@staticmethod
def _get_fast_track_state_file():
# This file keeps the timestamp of the most recent goal state if it was retrieved via Fast Track
return os.path.join(conf.get_lib_dir(), "fast_track.json")
@staticmethod
def _save_fast_track_state(timestamp):
try:
with open(HostPluginProtocol._get_fast_track_state_file(), "w") as file_:
json.dump({"timestamp": timestamp}, file_)
except Exception as e:
logger.warn("Error updating the Fast Track state ({0}): {1}", HostPluginProtocol._get_fast_track_state_file(), ustr(e))
@staticmethod
def clear_fast_track_state():
try:
if os.path.exists(HostPluginProtocol._get_fast_track_state_file()):
os.remove(HostPluginProtocol._get_fast_track_state_file())
except Exception as e:
logger.warn("Error clearing the current state for Fast Track ({0}): {1}", HostPluginProtocol._get_fast_track_state_file(),
ustr(e))
@staticmethod
def get_fast_track_timestamp():
"""
Returns the timestamp of the most recent FastTrack goal state retrieved by fetch_vm_settings(), or None if the most recent
goal state was Fabric or fetch_vm_settings() has not been invoked.
"""
if not os.path.exists(HostPluginProtocol._get_fast_track_state_file()):
return timeutil.create_timestamp(datetime.datetime.min)
try:
with open(HostPluginProtocol._get_fast_track_state_file(), "r") as file_:
return json.load(file_)["timestamp"]
except Exception as e:
logger.warn("Can't retrieve the timestamp for the most recent Fast Track goal state ({0}), will assume the current time. Error: {1}",
HostPluginProtocol._get_fast_track_state_file(), ustr(e))
return timeutil.create_timestamp(datetime.datetime.utcnow())
def fetch_vm_settings(self, force_update=False):
"""
Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalState, bool) tuple with the vmSettings and
a boolean indicating if they are an updated (True) or a cached value (False).
Raises
* VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API
* VmSettingsSupportStopped if the HostGAPlugin stopped supporting the vmSettings API
* VmSettingsParseError if the HostGAPlugin returned invalid vmSettings (e.g. syntax error)
* ResourceGoneError if the container ID and roleconfig name need to be refreshed
* ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error)
"""
def raise_not_supported():
try:
if self._supports_vm_settings:
# The most recent goal state was delivered using FastTrack, and suddenly the HostGAPlugin does not support the vmSettings API anymore.
# This can happen if, for example, the VM is migrated across host nodes that are running different versions of the HostGAPlugin.
logger.warn("The HostGAPlugin stopped supporting the vmSettings API. If there is a pending FastTrack goal state, it will not be executed.")
add_event(op=WALAEventOperation.VmSettings, message="[VmSettingsSupportStopped] HostGAPlugin: {0}".format(self._version), is_success=False, log_event=False)
raise VmSettingsSupportStopped(self._fast_track_timestamp)
else:
logger.info("HostGAPlugin {0} does not support the vmSettings API. Will not use FastTrack.", self._version)
add_event(op=WALAEventOperation.VmSettings, message="[VmSettingsNotSupported] HostGAPlugin: {0}".format(self._version), is_success=True)
raise VmSettingsNotSupported()
finally:
self._supports_vm_settings = False
self._supports_vm_settings_next_check = datetime.datetime.now() + datetime.timedelta(hours=6) # check again in 6 hours
def format_message(msg):
return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg)
try:
# Raise if VmSettings are not supported, but check again periodically since the HostGAPlugin could have been updated since the last check
# Note that self._host_plugin_supports_vm_settings can be None, so we need to compare against False
if not self._supports_vm_settings and self._supports_vm_settings_next_check > datetime.datetime.now():
# Raise VmSettingsNotSupported directly instead of using raise_not_supported() to avoid resetting the timestamp for the next check
raise VmSettingsNotSupported()
etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag
correlation_id = str(uuid.uuid4())
self._vm_settings_error_reporter.report_request()
url, headers = self.get_vm_settings_request(correlation_id)
if etag is not None:
headers['if-none-match'] = etag
response = restutil.http_get(url, headers=headers, use_proxy=False, max_retry=1, return_raw_response=True)
if response.status == httpclient.GONE:
raise ResourceGoneError()
if response.status == httpclient.NOT_FOUND: # the HostGAPlugin does not support FastTrack
raise_not_supported()
if response.status == httpclient.NOT_MODIFIED: # The goal state hasn't changed, return the current instance
return self._cached_vm_settings, False
if response.status != httpclient.OK:
error_description = restutil.read_response_error(response)
# For historical reasons the HostGAPlugin returns 502 (BAD_GATEWAY) for internal errors instead of using
# 500 (INTERNAL_SERVER_ERROR). We add a short prefix to the error message in the hope that it will help
# clear any confusion produced by the poor choice of status code.
if response.status == httpclient.BAD_GATEWAY:
error_description = "[Internal error in HostGAPlugin] {0}".format(error_description)
error_description = format_message(error_description)
if 400 <= response.status <= 499:
self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ClientError)
elif 500 <= response.status <= 599:
self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ServerError)
else:
self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.HttpError)
raise ProtocolError(error_description)
for h in response.getheaders():
if h[0].lower() == 'etag':
response_etag = h[1]
break
else: # since the vmSettings were updated, the response must include an etag
message = format_message("The vmSettings response does not include an Etag header")
raise ProtocolError(message)
response_content = ustr(response.read(), encoding='utf-8')
vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content, correlation_id)
# log the HostGAPlugin version
if vm_settings.host_ga_plugin_version != self._version:
self._version = vm_settings.host_ga_plugin_version
message = "HostGAPlugin version: {0}".format(vm_settings.host_ga_plugin_version)
logger.info(message)
add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True)
# Don't support HostGAPlugin versions older than 133
if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.133"):
raise_not_supported()
self._supports_vm_settings = True
self._cached_vm_settings = vm_settings
if vm_settings.source == GoalStateSource.FastTrack:
self._fast_track_timestamp = vm_settings.created_on_timestamp
self._save_fast_track_state(vm_settings.created_on_timestamp)
else:
self.clear_fast_track_state()
return vm_settings, True
except (ProtocolError, ResourceGoneError, VmSettingsNotSupported, VmSettingsParseError):
raise
except Exception as exception:
if isinstance(exception, IOError) and "timed out" in ustr(exception):
message = format_message("Timeout")
self._vm_settings_error_reporter.report_error(message, _VmSettingsError.Timeout)
else:
message = format_message("Request failed: {0}".format(textutil.format_exception(exception)))
self._vm_settings_error_reporter.report_error(message, _VmSettingsError.RequestFailed)
raise ProtocolError(message)
finally:
self._vm_settings_error_reporter.report_summary()
class VmSettingsNotSupported(TypeError):
"""
Indicates that the HostGAPlugin does not support the vmSettings API
"""
class VmSettingsSupportStopped(VmSettingsNotSupported):
"""
Indicates that the HostGAPlugin supported the vmSettings API in previous calls, but now it does not support it for current call.
This can happen, for example, if the VM is migrated across nodes with different HostGAPlugin versions.
"""
def __init__(self, timestamp):
super(VmSettingsSupportStopped, self).__init__()
self.timestamp = timestamp
class _VmSettingsError(object):
ClientError = 'ClientError'
HttpError = 'HttpError'
RequestFailed = 'RequestFailed'
ServerError = 'ServerError'
Timeout = 'Timeout'
class _VmSettingsErrorReporter(object):
_MaxErrors = 3 # Max number of errors reported to telemetry (by period)
_Period = datetime.timedelta(hours=1) # How often to report the summary
def __init__(self):
self._reset()
def _reset(self):
self._request_count = 0 # Total number of vmSettings HTTP requests
self._error_count = 0 # Total number of errors issuing vmSettings requests (includes all kinds of errors)
self._client_error_count = 0 # Count of client side errors (HTTP status in the 400s)
self._http_error_count = 0 # Count of HTTP errors other than 400s and 500s
self._request_failure_count = 0 # Total count of requests that could not be issued (does not include timeouts or requests that were actually issued and failed, for example, with 500 or 400 statuses)
self._server_error_count = 0 # Count of server side errors (HTTP status in the 500s)
self._timeout_count = 0 # Count of timeouts on vmSettings requests
self._next_period = datetime.datetime.now() + _VmSettingsErrorReporter._Period
def report_request(self):
self._request_count += 1
def report_error(self, error, category):
self._error_count += 1
if self._error_count <= _VmSettingsErrorReporter._MaxErrors:
add_event(op=WALAEventOperation.VmSettings, message="[{0}] {1}".format(category, error), is_success=True, log_event=False)
if category == _VmSettingsError.ClientError:
self._client_error_count += 1
elif category == _VmSettingsError.HttpError:
self._http_error_count += 1
elif category == _VmSettingsError.RequestFailed:
self._request_failure_count += 1
elif category == _VmSettingsError.ServerError:
self._server_error_count += 1
elif category == _VmSettingsError.Timeout:
self._timeout_count += 1
def report_summary(self):
if datetime.datetime.now() >= self._next_period:
summary = {
"requests": self._request_count,
"errors": self._error_count,
"serverErrors": self._server_error_count,
"clientErrors": self._client_error_count,
"timeouts": self._timeout_count,
"failedRequests": self._request_failure_count
}
message = json.dumps(summary)
add_event(op=WALAEventOperation.VmSettingsSummary, message=message, is_success=True, log_event=False)
if self._error_count > 0:
logger.info("[VmSettingsSummary] {0}", message)
self._reset()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/imds.py 0000664 0000000 0000000 00000033670 14626177470 0025425 0 ustar 00root root 0000000 0000000 # Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
import json
import re
from collections import namedtuple
import azurelinuxagent.common.utils.restutil as restutil
from azurelinuxagent.common.exception import HttpError, ResourceGoneError
from azurelinuxagent.common.future import ustr
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.datacontract import DataContract, set_properties
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
IMDS_ENDPOINT = '169.254.169.254'
APIVERSION = '2018-02-01'
BASE_METADATA_URI = "http://{0}/metadata/{1}?api-version={2}"
IMDS_IMAGE_ORIGIN_UNKNOWN = 0
IMDS_IMAGE_ORIGIN_CUSTOM = 1
IMDS_IMAGE_ORIGIN_ENDORSED = 2
IMDS_IMAGE_ORIGIN_PLATFORM = 3
MetadataResult = namedtuple('MetadataResult', ['success', 'service_error', 'response'])
IMDS_RESPONSE_SUCCESS = 0
IMDS_RESPONSE_ERROR = 1
IMDS_CONNECTION_ERROR = 2
IMDS_INTERNAL_SERVER_ERROR = 3
def get_imds_client(wireserver_endpoint):
return ImdsClient(wireserver_endpoint)
# A *slightly* future proof list of endorsed distros.
# -> e.g. I have predicted the future and said that 20.04-LTS will exist
# and is endored.
#
# See https://docs.microsoft.com/en-us/azure/virtual-machines/linux/endorsed-distros for
# more details.
#
# This is not an exhaustive list. This is a best attempt to mark images as
# endorsed or not. Image publishers do not encode all of the requisite information
# in their publisher, offer, sku, and version to definitively mark something as
# endorsed or not. This is not perfect, but it is approximately 98% perfect.
ENDORSED_IMAGE_INFO_MATCHER_JSON = """{
"CANONICAL": {
"UBUNTUSERVER": {
"List": [
"14.04.0-LTS",
"14.04.1-LTS",
"14.04.2-LTS",
"14.04.3-LTS",
"14.04.4-LTS",
"14.04.5-LTS",
"14.04.6-LTS",
"14.04.7-LTS",
"14.04.8-LTS",
"16.04-LTS",
"16.04.0-LTS",
"18.04-LTS",
"20.04-LTS",
"22.04-LTS"
]
}
},
"COREOS": {
"COREOS": {
"STABLE": { "Minimum": "494.4.0" }
}
},
"CREDATIV": {
"DEBIAN": { "Minimum": "7" }
},
"OPENLOGIC": {
"CENTOS": {
"Minimum": "6.3",
"List": [
"7-LVM",
"7-RAW"
]
},
"CENTOS-HPC": { "Minimum": "6.3" }
},
"REDHAT": {
"RHEL": {
"Minimum": "6.7",
"List": [
"7-LVM",
"7-RAW"
]
},
"RHEL-HANA": { "Minimum": "6.7" },
"RHEL-SAP": { "Minimum": "6.7" },
"RHEL-SAP-APPS": { "Minimum": "6.7" },
"RHEL-SAP-HANA": { "Minimum": "6.7" }
},
"SUSE": {
"SLES": {
"List": [
"11-SP4",
"11-SP5",
"11-SP6",
"12-SP1",
"12-SP2",
"12-SP3",
"12-SP4",
"12-SP5",
"12-SP6"
]
},
"SLES-BYOS": {
"List": [
"11-SP4",
"12",
"12-SP1",
"12-SP2",
"12-SP3",
"12-SP4",
"12-SP5",
"15",
"15-SP1",
"15-SP2",
"15-SP3",
"15-SP4",
"15-SP5"
]
},
"SLES-SAP": {
"List": [
"11-SP4",
"12",
"12-SP1",
"12-SP2",
"12-SP3",
"12-SP4",
"12-SP5",
"15",
"15-SP1",
"15-SP2",
"15-SP3",
"15-SP4",
"15-SP5"
]
},
"SLE-HPC": {
"List": [
"15-SP1",
"15-SP2",
"15-SP3",
"15-SP4",
"15-SP5"
]
}
}
}"""
class ImageInfoMatcher(object):
def __init__(self, doc):
self.doc = json.loads(doc)
def is_match(self, publisher, offer, sku, version):
def _is_match_walk(doci, keys):
key = keys.pop(0).upper()
if key is None:
return False
if key not in doci:
return False
if 'List' in doci[key] and keys[0] in doci[key]['List']:
return True
if 'Match' in doci[key] and re.match(doci[key]['Match'], keys[0]):
return True
if 'Minimum' in doci[key]:
try:
return FlexibleVersion(keys[0]) >= FlexibleVersion(doci[key]['Minimum'])
except ValueError:
pass
return _is_match_walk(doci[key], keys)
return _is_match_walk(self.doc, [ publisher, offer, sku, version ])
class ComputeInfo(DataContract):
__matcher = ImageInfoMatcher(ENDORSED_IMAGE_INFO_MATCHER_JSON)
def __init__(self,
location=None,
name=None,
offer=None,
osType=None,
placementGroupId=None,
platformFaultDomain=None,
placementUpdateDomain=None,
publisher=None,
resourceGroupName=None,
sku=None,
subscriptionId=None,
tags=None,
version=None,
vmId=None,
vmSize=None,
vmScaleSetName=None,
zone=None):
self.location = location
self.name = name
self.offer = offer
self.osType = osType
self.placementGroupId = placementGroupId
self.platformFaultDomain = platformFaultDomain
self.platformUpdateDomain = placementUpdateDomain
self.publisher = publisher
self.resourceGroupName = resourceGroupName
self.sku = sku
self.subscriptionId = subscriptionId
self.tags = tags
self.version = version
self.vmId = vmId
self.vmSize = vmSize
self.vmScaleSetName = vmScaleSetName
self.zone = zone
@property
def image_info(self):
return "{0}:{1}:{2}:{3}".format(self.publisher, self.offer, self.sku, self.version)
@property
def image_origin(self):
"""
An integer value describing the origin of the image.
0 -> unknown
1 -> custom - user created image
2 -> endorsed - See https://docs.microsoft.com/en-us/azure/virtual-machines/linux/endorsed-distros
3 -> platform - non-endorsed image that is available in the Azure Marketplace.
"""
try:
if self.publisher == "":
return IMDS_IMAGE_ORIGIN_CUSTOM
if ComputeInfo.__matcher.is_match(self.publisher, self.offer, self.sku, self.version):
return IMDS_IMAGE_ORIGIN_ENDORSED
else:
return IMDS_IMAGE_ORIGIN_PLATFORM
except Exception as e:
logger.periodic_warn(logger.EVERY_FIFTEEN_MINUTES,
"[PERIODIC] Could not determine the image origin from IMDS: {0}".format(ustr(e)))
return IMDS_IMAGE_ORIGIN_UNKNOWN
class ImdsClient(object):
def __init__(self, wireserver_endpoint, version=APIVERSION):
self._api_version = version
self._headers = {
'User-Agent': restutil.HTTP_USER_AGENT,
'Metadata': True,
}
self._health_headers = {
'User-Agent': restutil.HTTP_USER_AGENT_HEALTH,
'Metadata': True,
}
self._regex_ioerror = re.compile(r".*HTTP Failed. GET http://[^ ]+ -- IOError .*")
self._regex_throttled = re.compile(r".*HTTP Retry. GET http://[^ ]+ -- Status Code 429 .*")
self._wireserver_endpoint = wireserver_endpoint
def _get_metadata_url(self, endpoint, resource_path):
return BASE_METADATA_URI.format(endpoint, resource_path, self._api_version)
def _http_get(self, endpoint, resource_path, headers):
url = self._get_metadata_url(endpoint, resource_path)
return restutil.http_get(url, headers=headers, use_proxy=False)
def _get_metadata_from_endpoint(self, endpoint, resource_path, headers):
"""
Get metadata from one of the IMDS endpoints.
:param str endpoint: IMDS endpoint to call
:param str resource_path: path of IMDS resource
:param bool headers: headers to send in the request
:return: Tuple
status: one of the following response status codes: IMDS_RESPONSE_SUCCESS, IMDS_RESPONSE_ERROR,
IMDS_CONNECTION_ERROR, IMDS_INTERNAL_SERVER_ERROR
response: IMDS response on IMDS_RESPONSE_SUCCESS, failure message otherwise
"""
try:
resp = self._http_get(endpoint=endpoint, resource_path=resource_path, headers=headers)
except ResourceGoneError:
return IMDS_INTERNAL_SERVER_ERROR, "IMDS error in /metadata/{0}: HTTP Failed with Status Code 410: Gone".format(resource_path)
except HttpError as e:
msg = str(e)
if self._regex_throttled.match(msg):
return IMDS_RESPONSE_ERROR, "IMDS error in /metadata/{0}: Throttled".format(resource_path)
if self._regex_ioerror.match(msg):
logger.periodic_warn(logger.EVERY_FIFTEEN_MINUTES,
"[PERIODIC] [IMDS_CONNECTION_ERROR] Unable to connect to IMDS endpoint {0}".format(endpoint))
return IMDS_CONNECTION_ERROR, "IMDS error in /metadata/{0}: Unable to connect to endpoint".format(resource_path)
return IMDS_INTERNAL_SERVER_ERROR, "IMDS error in /metadata/{0}: {1}".format(resource_path, msg)
if resp.status >= 500:
return IMDS_INTERNAL_SERVER_ERROR, "IMDS error in /metadata/{0}: {1}".format(
resource_path, restutil.read_response_error(resp))
if restutil.request_failed(resp):
return IMDS_RESPONSE_ERROR, "IMDS error in /metadata/{0}: {1}".format(
resource_path, restutil.read_response_error(resp))
return IMDS_RESPONSE_SUCCESS, resp.read()
def get_metadata(self, resource_path, is_health):
"""
Get metadata from IMDS, falling back to Wireserver endpoint if necessary.
:param str resource_path: path of IMDS resource
:param bool is_health: True if for health/heartbeat, False otherwise
:return: instance of MetadataResult
:rtype: MetadataResult
"""
headers = self._health_headers if is_health else self._headers
endpoint = IMDS_ENDPOINT
status, resp = self._get_metadata_from_endpoint(endpoint, resource_path, headers)
if status == IMDS_CONNECTION_ERROR:
endpoint = self._wireserver_endpoint
status, resp = self._get_metadata_from_endpoint(endpoint, resource_path, headers)
if status == IMDS_RESPONSE_SUCCESS:
return MetadataResult(True, False, resp)
elif status == IMDS_INTERNAL_SERVER_ERROR:
return MetadataResult(False, True, resp)
return MetadataResult(False, False, resp)
def get_compute(self):
"""
Fetch compute information.
:return: instance of a ComputeInfo
:rtype: ComputeInfo
"""
# ensure we get a 200
result = self.get_metadata('instance/compute', is_health=False)
if not result.success:
raise HttpError(result.response)
data = json.loads(ustr(result.response, encoding="utf-8"))
compute_info = ComputeInfo()
set_properties('compute', compute_info, data)
return compute_info
def validate(self):
"""
Determines whether the metadata instance api returns 200, and the response
is valid: compute should contain location, name, subscription id, and vm size
and network should contain mac address and private ip address.
:return: Tuple
is_healthy: False when service returns an error, True on successful
response and connection failures.
error_response: validation failure details to assist with debugging
"""
# ensure we get a 200
result = self.get_metadata('instance', is_health=True)
if not result.success:
# we should only return False when the service is unhealthy
return (not result.service_error), result.response
# ensure the response is valid json
try:
json_data = json.loads(ustr(result.response, encoding="utf-8"))
except Exception as e:
return False, "JSON parsing failed: {0}".format(ustr(e))
# ensure all expected fields are present and have a value
try:
# TODO: compute fields cannot be verified yet since we need to exclude rdfe vms (#1249)
self.check_field(json_data, 'network')
self.check_field(json_data['network'], 'interface')
self.check_field(json_data['network']['interface'][0], 'macAddress')
self.check_field(json_data['network']['interface'][0], 'ipv4')
self.check_field(json_data['network']['interface'][0]['ipv4'], 'ipAddress')
self.check_field(json_data['network']['interface'][0]['ipv4']['ipAddress'][0], 'privateIpAddress')
except ValueError as v:
return False, ustr(v)
return True, ''
@staticmethod
def check_field(dict_obj, field):
if field not in dict_obj or dict_obj[field] is None:
raise ValueError('Missing field: [{0}]'.format(field))
if len(dict_obj[field]) == 0:
raise ValueError('Empty field: [{0}]'.format(field))
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/metadata_server_migration_util.py 0000664 0000000 0000000 00000006013 14626177470 0032734 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.utils.restutil import KNOWN_WIRESERVER_IP
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
# Name for Metadata Server Protocol
_METADATA_PROTOCOL_NAME = "MetadataProtocol"
# MetadataServer Certificates for Cleanup
_LEGACY_METADATA_SERVER_TRANSPORT_PRV_FILE_NAME = "V2TransportPrivate.pem"
_LEGACY_METADATA_SERVER_TRANSPORT_CERT_FILE_NAME = "V2TransportCert.pem"
_LEGACY_METADATA_SERVER_P7B_FILE_NAME = "Certificates.p7b"
# MetadataServer Endpoint
_KNOWN_METADATASERVER_IP = "169.254.169.254"
def is_metadata_server_artifact_present():
metadata_artifact_path = os.path.join(conf.get_lib_dir(), _LEGACY_METADATA_SERVER_TRANSPORT_CERT_FILE_NAME)
return os.path.isfile(metadata_artifact_path)
def cleanup_metadata_server_artifacts(osutil):
logger.info("Clean up for MetadataServer to WireServer protocol migration: removing MetadataServer certificates and resetting firewall rules.")
_cleanup_metadata_protocol_certificates()
_reset_firewall_rules(osutil)
def _cleanup_metadata_protocol_certificates():
"""
Removes MetadataServer Certificates.
"""
lib_directory = conf.get_lib_dir()
_ensure_file_removed(lib_directory, _LEGACY_METADATA_SERVER_TRANSPORT_PRV_FILE_NAME)
_ensure_file_removed(lib_directory, _LEGACY_METADATA_SERVER_TRANSPORT_CERT_FILE_NAME)
_ensure_file_removed(lib_directory, _LEGACY_METADATA_SERVER_P7B_FILE_NAME)
def _reset_firewall_rules(osutil):
"""
Removes MetadataServer firewall rule so IMDS can be used. Enables
WireServer firewall rule based on if firewall is configured to be on.
"""
osutil.remove_firewall(dst_ip=_KNOWN_METADATASERVER_IP, uid=os.getuid(), wait=osutil.get_firewall_will_wait())
if conf.enable_firewall():
success, _ = osutil.enable_firewall(dst_ip=KNOWN_WIRESERVER_IP, uid=os.getuid())
add_event(
AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.Firewall,
is_success=success,
log_event=False)
def _ensure_file_removed(directory, file_name):
"""
Removes files if they are present.
"""
path = os.path.join(directory, file_name)
if os.path.isfile(path):
os.remove(path)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/ovfenv.py 0000664 0000000 0000000 00000011660 14626177470 0025767 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
"""
Copy and parse ovf-env.xml from provisioning ISO and local cache
"""
import os # pylint: disable=W0611
import re # pylint: disable=W0611
import shutil # pylint: disable=W0611
import xml.dom.minidom as minidom # pylint: disable=W0611
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.exception import ProtocolError
from azurelinuxagent.common.future import ustr # pylint: disable=W0611
import azurelinuxagent.common.utils.fileutil as fileutil # pylint: disable=W0611
from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext
OVF_VERSION = "1.0"
OVF_NAME_SPACE = "http://schemas.dmtf.org/ovf/environment/1"
WA_NAME_SPACE = "http://schemas.microsoft.com/windowsazure"
def _validate_ovf(val, msg):
if val is None:
raise ProtocolError("Failed to validate OVF: {0}".format(msg))
class OvfEnv(object):
"""
Read, and process provisioning info from provisioning file OvfEnv.xml
"""
def __init__(self, xml_text):
if xml_text is None:
raise ValueError("ovf-env is None")
logger.verbose("Load ovf-env.xml")
self.hostname = None
self.username = None
self.user_password = None
self.customdata = None
self.disable_ssh_password_auth = True
self.ssh_pubkeys = []
self.ssh_keypairs = []
self.provision_guest_agent = None
self.parse(xml_text)
def parse(self, xml_text):
"""
Parse xml tree, retreiving user and ssh key information.
Return self.
"""
wans = WA_NAME_SPACE
ovfns = OVF_NAME_SPACE
xml_doc = parse_doc(xml_text)
environment = find(xml_doc, "Environment", namespace=ovfns)
_validate_ovf(environment, "Environment not found")
section = find(environment, "ProvisioningSection", namespace=wans)
_validate_ovf(section, "ProvisioningSection not found")
version = findtext(environment, "Version", namespace=wans)
_validate_ovf(version, "Version not found")
if version > OVF_VERSION:
logger.warn("Newer provisioning configuration detected. "
"Please consider updating waagent")
conf_set = find(section, "LinuxProvisioningConfigurationSet",
namespace=wans)
_validate_ovf(conf_set, "LinuxProvisioningConfigurationSet not found")
self.hostname = findtext(conf_set, "HostName", namespace=wans)
_validate_ovf(self.hostname, "HostName not found")
self.username = findtext(conf_set, "UserName", namespace=wans)
_validate_ovf(self.username, "UserName not found")
self.user_password = findtext(conf_set, "UserPassword", namespace=wans)
self.customdata = findtext(conf_set, "CustomData", namespace=wans)
auth_option = findtext(conf_set, "DisableSshPasswordAuthentication",
namespace=wans)
if auth_option is not None and auth_option.lower() == "true":
self.disable_ssh_password_auth = True
else:
self.disable_ssh_password_auth = False
public_keys = findall(conf_set, "PublicKey", namespace=wans)
for public_key in public_keys:
path = findtext(public_key, "Path", namespace=wans)
fingerprint = findtext(public_key, "Fingerprint", namespace=wans)
value = findtext(public_key, "Value", namespace=wans)
self.ssh_pubkeys.append((path, fingerprint, value))
keypairs = findall(conf_set, "KeyPair", namespace=wans)
for keypair in keypairs:
path = findtext(keypair, "Path", namespace=wans)
fingerprint = findtext(keypair, "Fingerprint", namespace=wans)
self.ssh_keypairs.append((path, fingerprint))
platform_settings_section = find(environment, "PlatformSettingsSection", namespace=wans)
_validate_ovf(platform_settings_section, "PlatformSettingsSection not found")
platform_settings = find(platform_settings_section, "PlatformSettings", namespace=wans)
_validate_ovf(platform_settings, "PlatformSettings not found")
self.provision_guest_agent = findtext(platform_settings, "ProvisionGuestAgent", namespace=wans)
_validate_ovf(self.provision_guest_agent, "ProvisionGuestAgent not found")
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/restapi.py 0000664 0000000 0000000 00000026252 14626177470 0026136 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import socket
import time
from azurelinuxagent.common.datacontract import DataContract, DataContractList
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.utils.textutil import getattrib
from azurelinuxagent.common.version import DISTRO_VERSION, DISTRO_NAME, CURRENT_VERSION
VERSION_0 = "0.0.0.0"
class VMInfo(DataContract):
def __init__(self,
subscriptionId=None,
vmName=None,
roleName=None,
roleInstanceName=None,
tenantName=None):
self.subscriptionId = subscriptionId
self.vmName = vmName
self.roleName = roleName
self.roleInstanceName = roleInstanceName
self.tenantName = tenantName
class CertificateData(DataContract):
def __init__(self, certificateData=None):
self.certificateData = certificateData
class Cert(DataContract):
def __init__(self,
name=None,
thumbprint=None,
certificateDataUri=None,
storeName=None,
storeLocation=None):
self.name = name
self.thumbprint = thumbprint
self.certificateDataUri = certificateDataUri
self.storeLocation = storeLocation
self.storeName = storeName
class CertList(DataContract):
def __init__(self):
self.certificates = DataContractList(Cert)
class VMAgentFamily(object):
def __init__(self, name):
self.name = name
# Two-state: None, string. Set to None if version not specified in the GS
self.version = None
# Tri-state: None, True, False. Set to None if this property not specified in the GS.
self.is_version_from_rsm = None
# Tri-state: None, True, False. Set to None if this property not specified in the GS.
self.is_vm_enabled_for_rsm_upgrades = None
self.uris = []
def __repr__(self):
return self.__str__()
def __str__(self):
return "[name: '{0}' uris: {1}]".format(self.name, self.uris)
class ExtensionState(object):
Enabled = ustr("enabled")
Disabled = ustr("disabled")
class ExtensionRequestedState(object):
"""
This is the state of the Handler as requested by the Goal State.
CRP only supports 2 states as of now - Enabled and Uninstall
Disabled was used for older XML extensions and we keep it to support backward compatibility.
"""
Enabled = ustr("enabled")
Disabled = ustr("disabled")
Uninstall = ustr("uninstall")
All = [Enabled, Disabled, Uninstall]
class ExtensionSettings(object):
"""
The runtime settings associated with a Handler
- Maps to Extension.PluginSettings.Plugin.RuntimeSettings for single config extensions in the ExtensionConfig.xml
Eg: 1.settings, 2.settings
- Maps to Extension.PluginSettings.Plugin.ExtensionRuntimeSettings for multi-config extensions in the
ExtensionConfig.xml
Eg: .1.settings, .2.settings
"""
def __init__(self,
name=None,
sequenceNumber=None,
publicSettings=None,
protectedSettings=None,
certificateThumbprint=None,
dependencyLevel=0,
state=ExtensionState.Enabled):
self.name = name
self.sequenceNumber = sequenceNumber
self.publicSettings = publicSettings
self.protectedSettings = protectedSettings
self.certificateThumbprint = certificateThumbprint
self.dependencyLevel = dependencyLevel
self.state = state
def dependency_level_sort_key(self, handler_state):
level = self.dependencyLevel
# Process uninstall or disabled before enabled, in reverse order
# Prioritize Handler state and Extension state both when sorting extensions
# remap 0 to -1, 1 to -2, 2 to -3, etc
if handler_state != ExtensionRequestedState.Enabled or self.state != ExtensionState.Enabled:
level = (0 - level) - 1
return level
def __repr__(self):
return self.__str__()
def __str__(self):
return "{0}".format(self.name)
class Extension(object):
"""
The main Plugin/handler specified by the publishers.
Maps to Extension.PluginSettings.Plugins.Plugin in the ExtensionConfig.xml file
Eg: Microsoft.OSTC.CustomScript
"""
def __init__(self, name=None):
self.name = name
self.version = None
self.state = None
self.settings = []
self.manifest_uris = []
self.supports_multi_config = False
self.__invalid_handler_setting_reason = None
@property
def is_invalid_setting(self):
return self.__invalid_handler_setting_reason is not None
@property
def invalid_setting_reason(self):
return self.__invalid_handler_setting_reason
@invalid_setting_reason.setter
def invalid_setting_reason(self, value):
self.__invalid_handler_setting_reason = value
def dependency_level_sort_key(self):
levels = [e.dependencyLevel for e in self.settings]
if len(levels) == 0:
level = 0
else:
level = min(levels)
# Process uninstall or disabled before enabled, in reverse order
# remap 0 to -1, 1 to -2, 2 to -3, etc
if self.state != u"enabled":
level = (0 - level) - 1
return level
def __repr__(self):
return self.__str__()
def __str__(self):
return "{0}-{1}".format(self.name, self.version)
class InVMGoalStateMetaData(DataContract):
"""
Object for parsing the GoalState MetaData received from CRP
Eg:
"""
def __init__(self, in_vm_metadata_node):
self.correlation_id = getattrib(in_vm_metadata_node, "correlationId")
self.activity_id = getattrib(in_vm_metadata_node, "activityId")
self.created_on_ticks = getattrib(in_vm_metadata_node, "createdOnTicks")
self.in_svd_seq_no = getattrib(in_vm_metadata_node, "inSvdSeqNo")
class ExtHandlerPackage(DataContract):
def __init__(self, version=None):
self.version = version
self.uris = []
# TODO update the naming to align with metadata protocol
self.isinternal = False
self.disallow_major_upgrade = False
class ExtHandlerPackageList(DataContract):
def __init__(self):
self.versions = DataContractList(ExtHandlerPackage)
class VMProperties(DataContract):
def __init__(self, certificateThumbprint=None):
# TODO need to confirm the property name
self.certificateThumbprint = certificateThumbprint
class ProvisionStatus(DataContract):
def __init__(self, status=None, subStatus=None, description=None):
self.status = status
self.subStatus = subStatus
self.description = description
self.properties = VMProperties()
class ExtensionSubStatus(DataContract):
def __init__(self, name=None, status=None, code=None, message=None):
self.name = name
self.status = status
self.code = code
self.message = message
class ExtensionStatus(DataContract):
def __init__(self,
name=None,
configurationAppliedTime=None,
operation=None,
status=None,
seq_no=None,
code=None,
message=None):
self.name = name
self.configurationAppliedTime = configurationAppliedTime
self.operation = operation
self.status = status
self.sequenceNumber = seq_no
self.code = code
self.message = message
self.substatusList = DataContractList(ExtensionSubStatus)
class ExtHandlerStatus(DataContract):
def __init__(self,
name=None,
version=None,
status=None,
code=0,
message=None):
self.name = name
self.version = version
self.status = status
self.code = code
self.message = message
self.supports_multi_config = False
self.extension_status = None
class VMAgentStatus(DataContract):
def __init__(self, status=None, message=None, gs_aggregate_status=None, update_status=None):
self.status = status
self.message = message
self.hostname = socket.gethostname()
self.version = str(CURRENT_VERSION)
self.osname = DISTRO_NAME
self.osversion = DISTRO_VERSION
self.extensionHandlers = DataContractList(ExtHandlerStatus)
self.vm_artifacts_aggregate_status = VMArtifactsAggregateStatus(gs_aggregate_status)
self.update_status = update_status
self._supports_fast_track = False
@property
def supports_fast_track(self):
return self._supports_fast_track
def set_supports_fast_track(self, value):
self._supports_fast_track = value
class VMStatus(DataContract):
def __init__(self, status, message, gs_aggregate_status=None, vm_agent_update_status=None):
self.vmAgent = VMAgentStatus(status=status, message=message, gs_aggregate_status=gs_aggregate_status,
update_status=vm_agent_update_status)
class GoalStateAggregateStatus(DataContract):
def __init__(self, seq_no, status=None, message="", code=None):
self.message = message
self.in_svd_seq_no = seq_no
self.status = status
self.code = code
self.__utc_timestamp = time.gmtime()
@property
def processed_time(self):
return self.__utc_timestamp
class VMArtifactsAggregateStatus(DataContract):
def __init__(self, gs_aggregate_status=None):
self.goal_state_aggregate_status = gs_aggregate_status
class RemoteAccessUser(DataContract):
def __init__(self, name, encrypted_password, expiration):
self.name = name
self.encrypted_password = encrypted_password
self.expiration = expiration
class RemoteAccessUsersList(DataContract):
def __init__(self):
self.users = DataContractList(RemoteAccessUser)
class VMAgentUpdateStatuses(object):
Success = ustr("Success")
Transitioning = ustr("Transitioning")
Error = ustr("Error")
Unknown = ustr("Unknown")
class VMAgentUpdateStatus(object):
def __init__(self, expected_version, status=VMAgentUpdateStatuses.Success, message="", code=0):
self.expected_version = expected_version
self.status = status
self.message = message
self.code = code
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/util.py 0000664 0000000 0000000 00000027711 14626177470 0025445 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import errno
import os
import re
import time
import threading
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
from azurelinuxagent.common.singletonperthread import SingletonPerThread
from azurelinuxagent.common.exception import ProtocolError, OSUtilError, \
ProtocolNotFoundError, DhcpError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.dhcp import get_dhcp_handler
from azurelinuxagent.common.protocol.metadata_server_migration_util import cleanup_metadata_server_artifacts, \
is_metadata_server_artifact_present
from azurelinuxagent.common.protocol.ovfenv import OvfEnv
from azurelinuxagent.common.protocol.wire import WireProtocol
from azurelinuxagent.common.utils.restutil import KNOWN_WIRESERVER_IP, \
IOErrorCounter
OVF_FILE_NAME = "ovf-env.xml"
PROTOCOL_FILE_NAME = "Protocol"
MAX_RETRY = 360
PROBE_INTERVAL = 10
ENDPOINT_FILE_NAME = "WireServerEndpoint"
PASSWORD_PATTERN = ".*?<"
PASSWORD_REPLACEMENT = "*<"
WIRE_PROTOCOL_NAME = "WireProtocol"
def get_protocol_util():
return ProtocolUtil()
class ProtocolUtil(SingletonPerThread):
"""
ProtocolUtil handles initialization for protocol instance. 2 protocol types
are invoked, wire protocol and metadata protocols.
Note: ProtocolUtil is a sub class of SingletonPerThread, this basically means that there would only be 1 single
instance of ProtocolUtil object per thread.
"""
def __init__(self):
self._lock = threading.RLock() # protects the files on disk created during protocol detection
self._protocol = None
self.endpoint = None
self.osutil = get_osutil()
self.dhcp_handler = get_dhcp_handler()
def copy_ovf_env(self):
"""
Copy ovf env file from dvd to hard disk.
Remove password before save it to the disk
"""
dvd_mount_point = conf.get_dvd_mount_point()
ovf_file_path_on_dvd = os.path.join(dvd_mount_point, OVF_FILE_NAME)
ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME)
try:
self.osutil.mount_dvd()
except OSUtilError as e:
raise ProtocolError("[CopyOvfEnv] Error mounting dvd: "
"{0}".format(ustr(e)))
try:
ovfxml = fileutil.read_file(ovf_file_path_on_dvd, remove_bom=True)
ovfenv = OvfEnv(ovfxml)
except (IOError, OSError) as e:
raise ProtocolError("[CopyOvfEnv] Error reading file "
"{0}: {1}".format(ovf_file_path_on_dvd,
ustr(e)))
try:
ovfxml = re.sub(PASSWORD_PATTERN,
PASSWORD_REPLACEMENT,
ovfxml)
fileutil.write_file(ovf_file_path, ovfxml)
except (IOError, OSError) as e:
raise ProtocolError("[CopyOvfEnv] Error writing file "
"{0}: {1}".format(ovf_file_path,
ustr(e)))
self._cleanup_ovf_dvd()
return ovfenv
def _cleanup_ovf_dvd(self):
try:
self.osutil.umount_dvd()
self.osutil.eject_dvd()
except OSUtilError as e:
logger.warn(ustr(e))
def get_ovf_env(self):
"""
Load saved ovf-env.xml
"""
ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME)
if os.path.isfile(ovf_file_path):
xml_text = fileutil.read_file(ovf_file_path)
return OvfEnv(xml_text)
else:
raise ProtocolError(
"ovf-env.xml is missing from {0}".format(ovf_file_path))
def _get_protocol_file_path(self):
return os.path.join(
conf.get_lib_dir(),
PROTOCOL_FILE_NAME)
def _get_wireserver_endpoint_file_path(self):
return os.path.join(
conf.get_lib_dir(),
ENDPOINT_FILE_NAME)
def get_wireserver_endpoint(self):
self._lock.acquire()
try:
if self.endpoint:
return self.endpoint
file_path = self._get_wireserver_endpoint_file_path()
if os.path.isfile(file_path):
try:
self.endpoint = fileutil.read_file(file_path)
if self.endpoint:
logger.info("WireServer endpoint {0} read from file", self.endpoint)
return self.endpoint
logger.error("[GetWireserverEndpoint] Unexpected empty file {0}", file_path)
except (IOError, OSError) as e:
logger.error("[GetWireserverEndpoint] Error reading file {0}: {1}", file_path, str(e))
else:
logger.error("[GetWireserverEndpoint] Missing file {0}", file_path)
self.endpoint = KNOWN_WIRESERVER_IP
logger.info("Using hardcoded Wireserver endpoint {0}", self.endpoint)
return self.endpoint
finally:
self._lock.release()
def _set_wireserver_endpoint(self, endpoint):
try:
self.endpoint = endpoint
file_path = self._get_wireserver_endpoint_file_path()
fileutil.write_file(file_path, endpoint)
except (IOError, OSError) as e:
raise OSUtilError(ustr(e))
def _clear_wireserver_endpoint(self):
"""
Cleanup previous saved wireserver endpoint.
"""
self.endpoint = None
endpoint_file_path = self._get_wireserver_endpoint_file_path()
if not os.path.isfile(endpoint_file_path):
return
try:
os.remove(endpoint_file_path)
except (IOError, OSError) as e:
# Ignore file-not-found errors (since the file is being removed)
if e.errno == errno.ENOENT:
return
logger.error("Failed to clear wiresever endpoint: {0}", e)
def _detect_protocol(self, save_to_history, init_goal_state=True):
"""
Probe protocol endpoints in turn.
"""
self.clear_protocol()
for retry in range(0, MAX_RETRY):
try:
endpoint = self.dhcp_handler.endpoint
if endpoint is None:
# pylint: disable=W0105
'''
Check if DHCP can be used to get the wire protocol endpoint
'''
# pylint: enable=W0105
dhcp_available = self.osutil.is_dhcp_available()
if dhcp_available:
logger.info("WireServer endpoint is not found. Rerun dhcp handler")
try:
self.dhcp_handler.run()
except DhcpError as e:
raise ProtocolError(ustr(e))
endpoint = self.dhcp_handler.endpoint
else:
logger.info("_detect_protocol: DHCP not available")
endpoint = self.get_wireserver_endpoint()
try:
protocol = WireProtocol(endpoint)
protocol.detect(init_goal_state=init_goal_state, save_to_history=save_to_history)
self._set_wireserver_endpoint(endpoint)
return protocol
except ProtocolError as e:
logger.info("WireServer is not responding. Reset dhcp endpoint")
self.dhcp_handler.endpoint = None
self.dhcp_handler.skip_cache = True
raise e
except ProtocolError as e:
logger.info("Protocol endpoint not found: {0}", e)
if retry < MAX_RETRY - 1:
logger.info("Retry detect protocol: retry={0}", retry)
time.sleep(PROBE_INTERVAL)
raise ProtocolNotFoundError("No protocol found.")
def _save_protocol(self, protocol_name):
"""
Save protocol endpoint
"""
protocol_file_path = self._get_protocol_file_path()
try:
fileutil.write_file(protocol_file_path, protocol_name)
except (IOError, OSError) as e:
logger.error("Failed to save protocol endpoint: {0}", e)
def clear_protocol(self):
"""
Cleanup previous saved protocol endpoint.
"""
self._lock.acquire()
try:
logger.info("Clean protocol and wireserver endpoint")
self._clear_wireserver_endpoint()
self._protocol = None
protocol_file_path = self._get_protocol_file_path()
if not os.path.isfile(protocol_file_path):
return
try:
os.remove(protocol_file_path)
except (IOError, OSError) as e:
# Ignore file-not-found errors (since the file is being removed)
if e.errno == errno.ENOENT:
return
logger.error("Failed to clear protocol endpoint: {0}", e)
finally:
self._lock.release()
def get_protocol(self, init_goal_state=True, save_to_history=False):
"""
Detect protocol by endpoint.
:returns: protocol instance
"""
self._lock.acquire()
try:
if self._protocol is not None:
return self._protocol
# If the protocol file contains MetadataProtocol we need to fall through to
# _detect_protocol so that we can generate the WireServer transport certificates.
protocol_file_path = self._get_protocol_file_path()
if os.path.isfile(protocol_file_path) and fileutil.read_file(protocol_file_path) == WIRE_PROTOCOL_NAME:
endpoint = self.get_wireserver_endpoint()
self._protocol = WireProtocol(endpoint)
# If metadataserver certificates are present we clean certificates
# and remove MetadataServer firewall rule. It is possible
# there was a previous intermediate upgrade before 2.2.48 but metadata artifacts
# were not cleaned up (intermediate updated agent does not have cleanup
# logic but we transitioned from Metadata to Wire protocol)
if is_metadata_server_artifact_present():
cleanup_metadata_server_artifacts(self.osutil)
return self._protocol
logger.info("Detect protocol endpoint")
protocol = self._detect_protocol(save_to_history=save_to_history, init_goal_state=init_goal_state)
IOErrorCounter.set_protocol_endpoint(endpoint=protocol.get_endpoint())
self._save_protocol(WIRE_PROTOCOL_NAME)
self._protocol = protocol
# Need to clean up MDS artifacts only after _detect_protocol so that we don't
# delete MDS certificates if we can't reach WireServer and have to roll back
# the update
if is_metadata_server_artifact_present():
cleanup_metadata_server_artifacts(self.osutil)
return self._protocol
finally:
self._lock.release()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/protocol/wire.py 0000664 0000000 0000000 00000145514 14626177470 0025440 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import json
import os
import random
import shutil
import time
import zipfile
from collections import defaultdict
from datetime import datetime, timedelta
from xml.sax import saxutils
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_crp, SupportedFeatureNames
from azurelinuxagent.common.datacontract import validate_param
from azurelinuxagent.common.event import add_event, WALAEventOperation, report_event, \
CollectOrReportEventDebugInfo, add_periodic
from azurelinuxagent.common.exception import ProtocolNotFoundError, \
ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError, ExtensionErrorCodes
from azurelinuxagent.common.future import httpclient, bytebuffer, ustr
from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME, \
GoalStateProperties
from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol
from azurelinuxagent.common.protocol.restapi import DataContract, ProvisionStatus, VMInfo, VMStatus
from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema
from azurelinuxagent.common.utils import fileutil, restutil
from azurelinuxagent.common.utils.cryptutil import CryptUtil
from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, \
findtext, gettext, remove_bom, get_bytes_from_pem, parse_json
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
VERSION_INFO_URI = "http://{0}/?comp=versions"
HEALTH_REPORT_URI = "http://{0}/machine?comp=health"
ROLE_PROP_URI = "http://{0}/machine?comp=roleProperties"
TELEMETRY_URI = "http://{0}/machine?comp=telemetrydata"
PROTOCOL_VERSION = "2012-11-30"
ENDPOINT_FINE_NAME = "WireServer"
SHORT_WAITING_INTERVAL = 1 # 1 second
MAX_EVENT_BUFFER_SIZE = 2 ** 16 - 2 ** 10
_DOWNLOAD_TIMEOUT = timedelta(minutes=5)
class UploadError(HttpError):
pass
class WireProtocol(DataContract):
def __init__(self, endpoint):
if endpoint is None:
raise ProtocolError("WireProtocol endpoint is None")
self.client = WireClient(endpoint)
def detect(self, init_goal_state=True, save_to_history=False):
self.client.check_wire_protocol_version()
trans_prv_file = os.path.join(conf.get_lib_dir(),
TRANSPORT_PRV_FILE_NAME)
trans_cert_file = os.path.join(conf.get_lib_dir(),
TRANSPORT_CERT_FILE_NAME)
cryptutil = CryptUtil(conf.get_openssl_cmd())
cryptutil.gen_transport_cert(trans_prv_file, trans_cert_file)
# Initialize the goal state, including all the inner properties
if init_goal_state:
logger.info('Initializing goal state during protocol detection')
self.client.reset_goal_state(save_to_history=save_to_history)
def update_host_plugin_from_goal_state(self):
self.client.update_host_plugin_from_goal_state()
def get_endpoint(self):
return self.client.get_endpoint()
def get_vminfo(self):
goal_state = self.client.get_goal_state()
hosting_env = self.client.get_hosting_env()
vminfo = VMInfo()
vminfo.subscriptionId = None
vminfo.vmName = hosting_env.vm_name
vminfo.tenantName = hosting_env.deployment_name
vminfo.roleName = hosting_env.role_name
vminfo.roleInstanceName = goal_state.role_instance_id
return vminfo
def get_certs(self):
certificates = self.client.get_certs()
return certificates.cert_list
def get_goal_state(self):
return self.client.get_goal_state()
def report_provision_status(self, provision_status):
validate_param("provision_status", provision_status, ProvisionStatus)
if provision_status.status is not None:
self.client.report_health(provision_status.status,
provision_status.subStatus,
provision_status.description)
if provision_status.properties.certificateThumbprint is not None:
thumbprint = provision_status.properties.certificateThumbprint
self.client.report_role_prop(thumbprint)
def report_vm_status(self, vm_status):
validate_param("vm_status", vm_status, VMStatus)
self.client.status_blob.set_vm_status(vm_status)
self.client.upload_status_blob()
def report_event(self, events_iterator):
self.client.report_event(events_iterator)
def upload_logs(self, logs):
self.client.upload_logs(logs)
def get_status_blob_data(self):
return self.client.status_blob.data
def _build_role_properties(container_id, role_instance_id, thumbprint):
xml = (u""
u""
u""
u"{0}"
u""
u""
u"{1}"
u""
u""
u""
u""
u""
u""
u""
u"").format(container_id, role_instance_id, thumbprint)
return xml
def _build_health_report(incarnation, container_id, role_instance_id,
status, substatus, description):
# The max description that can be sent to WireServer is 4096 bytes.
# Exceeding this max can result in a failure to report health.
# To keep this simple, we will keep a 10% buffer and trim before
# encoding the description.
if description:
max_chars_before_encoding = 3686
len_before_trim = len(description)
description = description[:max_chars_before_encoding]
trimmed_char_count = len_before_trim - len(description)
if trimmed_char_count > 0:
logger.info(
'Trimmed health report description by {0} characters'.format(
trimmed_char_count
)
)
# Escape '&', '<' and '>'
description = saxutils.escape(ustr(description))
detail = u''
if substatus is not None:
substatus = saxutils.escape(ustr(substatus))
detail = (u""
u"{0}"
u"{1}"
u" ").format(substatus, description)
xml = (u""
u""
u"{0}"
u""
u"{1}"
u""
u""
u"{2}"
u""
u"{3}"
u"{4}"
u""
u""
u""
u""
u""
u"").format(incarnation,
container_id,
role_instance_id,
status,
detail)
return xml
def ga_status_to_guest_info(ga_status):
"""
Convert VMStatus object to status blob format
"""
v1_ga_guest_info = {
"computerName": ga_status.hostname,
"osName": ga_status.osname,
"osVersion": ga_status.osversion,
"version": ga_status.version,
}
return v1_ga_guest_info
def __get_formatted_msg_for_status_reporting(msg, lang="en-US"):
return {
'lang': lang,
'message': msg
}
def _get_utc_timestamp_for_status_reporting(time_format="%Y-%m-%dT%H:%M:%SZ", timestamp=None):
timestamp = time.gmtime() if timestamp is None else timestamp
return time.strftime(time_format, timestamp)
def ga_status_to_v1(ga_status):
v1_ga_status = {
"version": ga_status.version,
"status": ga_status.status,
"formattedMessage": __get_formatted_msg_for_status_reporting(ga_status.message)
}
if ga_status.update_status is not None:
v1_ga_status["updateStatus"] = get_ga_update_status_to_v1(ga_status.update_status)
return v1_ga_status
def get_ga_update_status_to_v1(update_status):
v1_ga_update_status = {
"expectedVersion": update_status.expected_version,
"status": update_status.status,
"code": update_status.code,
"formattedMessage": __get_formatted_msg_for_status_reporting(update_status.message)
}
return v1_ga_update_status
def ext_substatus_to_v1(sub_status_list):
status_list = []
for substatus in sub_status_list:
status = {
"name": substatus.name,
"status": substatus.status,
"code": substatus.code,
"formattedMessage": __get_formatted_msg_for_status_reporting(substatus.message)
}
status_list.append(status)
return status_list
def ext_status_to_v1(ext_status):
if ext_status is None:
return None
timestamp = _get_utc_timestamp_for_status_reporting()
v1_sub_status = ext_substatus_to_v1(ext_status.substatusList)
v1_ext_status = {
"status": {
"name": ext_status.name,
"configurationAppliedTime": ext_status.configurationAppliedTime,
"operation": ext_status.operation,
"status": ext_status.status,
"code": ext_status.code,
"formattedMessage": __get_formatted_msg_for_status_reporting(ext_status.message)
},
"version": 1.0,
"timestampUTC": timestamp
}
if len(v1_sub_status) != 0:
v1_ext_status['status']['substatus'] = v1_sub_status
return v1_ext_status
def ext_handler_status_to_v1(ext_handler_status):
v1_handler_status = {
'handlerVersion': ext_handler_status.version,
'handlerName': ext_handler_status.name,
'status': ext_handler_status.status,
'code': ext_handler_status.code,
'useExactVersion': True
}
if ext_handler_status.message is not None:
v1_handler_status["formattedMessage"] = __get_formatted_msg_for_status_reporting(ext_handler_status.message)
v1_ext_status = ext_status_to_v1(ext_handler_status.extension_status)
if ext_handler_status.extension_status is not None and v1_ext_status is not None:
v1_handler_status["runtimeSettingsStatus"] = {
'settingsStatus': v1_ext_status,
'sequenceNumber': ext_handler_status.extension_status.sequenceNumber
}
# Add extension name if Handler supports MultiConfig
if ext_handler_status.supports_multi_config:
v1_handler_status["runtimeSettingsStatus"]["extensionName"] = ext_handler_status.extension_status.name
return v1_handler_status
def vm_artifacts_aggregate_status_to_v1(vm_artifacts_aggregate_status):
gs_aggregate_status = vm_artifacts_aggregate_status.goal_state_aggregate_status
if gs_aggregate_status is None:
return None
v1_goal_state_aggregate_status = {
"formattedMessage": __get_formatted_msg_for_status_reporting(gs_aggregate_status.message),
"timestampUTC": _get_utc_timestamp_for_status_reporting(timestamp=gs_aggregate_status.processed_time),
"inSvdSeqNo": gs_aggregate_status.in_svd_seq_no,
"status": gs_aggregate_status.status,
"code": gs_aggregate_status.code
}
v1_artifact_aggregate_status = {
"goalStateAggregateStatus": v1_goal_state_aggregate_status
}
return v1_artifact_aggregate_status
def vm_status_to_v1(vm_status):
timestamp = _get_utc_timestamp_for_status_reporting()
v1_ga_guest_info = ga_status_to_guest_info(vm_status.vmAgent)
v1_ga_status = ga_status_to_v1(vm_status.vmAgent)
v1_vm_artifact_aggregate_status = vm_artifacts_aggregate_status_to_v1(
vm_status.vmAgent.vm_artifacts_aggregate_status)
v1_handler_status_list = []
for handler_status in vm_status.vmAgent.extensionHandlers:
v1_handler_status_list.append(ext_handler_status_to_v1(handler_status))
v1_agg_status = {
'guestAgentStatus': v1_ga_status,
'handlerAggregateStatus': v1_handler_status_list
}
if v1_vm_artifact_aggregate_status is not None:
v1_agg_status['vmArtifactsAggregateStatus'] = v1_vm_artifact_aggregate_status
v1_vm_status = {
'version': '1.1',
'timestampUTC': timestamp,
'aggregateStatus': v1_agg_status,
'guestOSInfo': v1_ga_guest_info
}
supported_features = []
for _, feature in get_agent_supported_features_list_for_crp().items():
supported_features.append(
{
"Key": feature.name,
"Value": feature.version
}
)
if vm_status.vmAgent.supports_fast_track:
supported_features.append(
{
"Key": SupportedFeatureNames.FastTrack,
"Value": "1.0" # This is a dummy version; CRP ignores it
}
)
if supported_features:
v1_vm_status["supportedFeatures"] = supported_features
return v1_vm_status
class StatusBlob(object):
def __init__(self, client):
self.vm_status = None
self.client = client
self.type = None
self.data = None
def set_vm_status(self, vm_status):
validate_param("vmAgent", vm_status, VMStatus)
self.vm_status = vm_status
def to_json(self):
report = vm_status_to_v1(self.vm_status)
return json.dumps(report)
__storage_version__ = "2014-02-14"
def prepare(self, blob_type):
logger.verbose("Prepare status blob")
self.data = self.to_json()
self.type = blob_type
def upload(self, url):
try:
if not self.type in ["BlockBlob", "PageBlob"]:
raise ProtocolError("Illegal blob type: {0}".format(self.type))
if self.type == "BlockBlob":
self.put_block_blob(url, self.data)
else:
self.put_page_blob(url, self.data)
return True
except Exception as e:
logger.verbose("Initial status upload failed: {0}", e)
return False
def get_block_blob_headers(self, blob_size):
return {
"Content-Length": ustr(blob_size),
"x-ms-blob-type": "BlockBlob",
"x-ms-date": _get_utc_timestamp_for_status_reporting(),
"x-ms-version": self.__class__.__storage_version__
}
def put_block_blob(self, url, data):
logger.verbose("Put block blob")
headers = self.get_block_blob_headers(len(data))
resp = self.client.call_storage_service(restutil.http_put, url, data, headers)
if resp.status != httpclient.CREATED:
raise UploadError(
"Failed to upload block blob: {0}".format(resp.status))
def get_page_blob_create_headers(self, blob_size):
return {
"Content-Length": "0",
"x-ms-blob-content-length": ustr(blob_size),
"x-ms-blob-type": "PageBlob",
"x-ms-date": _get_utc_timestamp_for_status_reporting(),
"x-ms-version": self.__class__.__storage_version__
}
def get_page_blob_page_headers(self, start, end):
return {
"Content-Length": ustr(end - start),
"x-ms-date": _get_utc_timestamp_for_status_reporting(),
"x-ms-range": "bytes={0}-{1}".format(start, end - 1),
"x-ms-page-write": "update",
"x-ms-version": self.__class__.__storage_version__
}
def put_page_blob(self, url, data):
logger.verbose("Put page blob")
# Convert string into bytes and align to 512 bytes
data = bytearray(data, encoding='utf-8')
page_blob_size = int((len(data) + 511) / 512) * 512
headers = self.get_page_blob_create_headers(page_blob_size)
resp = self.client.call_storage_service(restutil.http_put, url, "", headers)
if resp.status != httpclient.CREATED:
raise UploadError(
"Failed to clean up page blob: {0}".format(resp.status))
if url.count("?") <= 0:
url = "{0}?comp=page".format(url)
else:
url = "{0}&comp=page".format(url)
logger.verbose("Upload page blob")
page_max = 4 * 1024 * 1024 # Max page size: 4MB
start = 0
end = 0
while end < len(data):
end = min(len(data), start + page_max)
content_size = end - start
# Align to 512 bytes
page_end = int((end + 511) / 512) * 512
buf_size = page_end - start
buf = bytearray(buf_size)
buf[0: content_size] = data[start: end]
headers = self.get_page_blob_page_headers(start, page_end)
resp = self.client.call_storage_service(
restutil.http_put,
url,
bytebuffer(buf),
headers)
if resp is None or resp.status != httpclient.CREATED:
raise UploadError(
"Failed to upload page blob: {0}".format(resp.status))
start = end
def event_param_to_v1(param):
param_format = ustr('')
param_type = type(param.value)
attr_type = ""
if param_type is int:
attr_type = 'mt:uint64'
elif param_type is str:
attr_type = 'mt:wstr'
elif ustr(param_type).count("'unicode'") > 0:
attr_type = 'mt:wstr'
elif param_type is bool:
attr_type = 'mt:bool'
elif param_type is float:
attr_type = 'mt:float64'
return param_format.format(param.name,
saxutils.quoteattr(ustr(param.value)),
attr_type)
def event_to_v1_encoded(event, encoding='utf-8'):
params = ""
for param in event.parameters:
params += event_param_to_v1(param)
event_str = ustr('').format(event.eventId, params)
return event_str.encode(encoding)
class WireClient(object):
def __init__(self, endpoint):
logger.info("Wire server endpoint:{0}", endpoint)
self._endpoint = endpoint
self._goal_state = None
self._host_plugin = None
self.status_blob = StatusBlob(self)
def get_endpoint(self):
return self._endpoint
def call_wireserver(self, http_req, *args, **kwargs):
try:
# Never use the HTTP proxy for wireserver
kwargs['use_proxy'] = False
resp = http_req(*args, **kwargs)
if restutil.request_failed(resp):
msg = "[Wireserver Failed] URI {0} ".format(args[0])
if resp is not None:
msg += " [HTTP Failed] Status Code {0}".format(resp.status)
raise ProtocolError(msg)
# If the GoalState is stale, pass along the exception to the caller
except ResourceGoneError:
raise
except Exception as e:
raise ProtocolError("[Wireserver Exception] {0}".format(ustr(e)))
return resp
def decode_config(self, data):
if data is None:
return None
data = remove_bom(data)
xml_text = ustr(data, encoding='utf-8')
return xml_text
def fetch_config(self, uri, headers):
resp = self.call_wireserver(restutil.http_get, uri, headers=headers)
return self.decode_config(resp.read())
@staticmethod
def call_storage_service(http_req, *args, **kwargs):
# Default to use the configured HTTP proxy
if not 'use_proxy' in kwargs or kwargs['use_proxy'] is None:
kwargs['use_proxy'] = True
return http_req(*args, **kwargs)
def fetch_artifacts_profile_blob(self, uri):
return self._fetch_content("artifacts profile blob", [uri], use_verify_header=False)[1] # _fetch_content returns a (uri, content) tuple
def fetch_manifest(self, manifest_type, uris, use_verify_header):
uri, content = self._fetch_content("{0} manifest".format(manifest_type), uris, use_verify_header=use_verify_header)
self.get_host_plugin().update_manifest_uri(uri)
return content
def _fetch_content(self, download_type, uris, use_verify_header):
"""
Walks the given list of 'uris' issuing HTTP GET requests; returns a tuple with the URI and the content of the first successful request.
The 'download_type' is added to any log messages produced by this method; it should describe the type of content of the given URIs
(e.g. "manifest", "extension package", etc).
"""
host_ga_plugin = self.get_host_plugin()
direct_download = lambda uri: self.fetch(uri)[0]
def hgap_download(uri):
request_uri, request_headers = host_ga_plugin.get_artifact_request(uri, use_verify_header=use_verify_header)
response, _ = self.fetch(request_uri, request_headers, use_proxy=False, retry_codes=restutil.HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES)
return response
return self._download_with_fallback_channel(download_type, uris, direct_download=direct_download, hgap_download=hgap_download)
def download_zip_package(self, package_type, uris, target_file, target_directory, use_verify_header):
"""
Downloads the ZIP package specified in 'uris' (which is a list of alternate locations for the ZIP), saving it to 'target_file' and then expanding
its contents to 'target_directory'. Deletes the target file after it has been expanded.
The 'package_type' is only used in log messages and has no other semantics. It should specify the contents of the ZIP, e.g. "extension package"
or "agent package"
The 'use_verify_header' parameter indicates whether the verify header should be added when using the extensionArtifact API of the HostGAPlugin.
"""
host_ga_plugin = self.get_host_plugin()
direct_download = lambda uri: self.stream(uri, target_file, headers=None, use_proxy=True)
def hgap_download(uri):
request_uri, request_headers = host_ga_plugin.get_artifact_request(uri, use_verify_header=use_verify_header, artifact_manifest_url=host_ga_plugin.manifest_uri)
return self.stream(request_uri, target_file, headers=request_headers, use_proxy=False)
on_downloaded = lambda: WireClient._try_expand_zip_package(package_type, target_file, target_directory)
self._download_with_fallback_channel(package_type, uris, direct_download=direct_download, hgap_download=hgap_download, on_downloaded=on_downloaded)
def _download_with_fallback_channel(self, download_type, uris, direct_download, hgap_download, on_downloaded=None):
"""
Walks the given list of 'uris' issuing HTTP GET requests, attempting to download the content of each URI. The download is done using both the default and
the fallback channels, until one of them succeeds. The 'direct_download' and 'hgap_download' functions define the logic to do direct calls to the URI or
to use the HostGAPlugin as a proxy for the download. Initially the default channel is the direct download and the fallback channel is the HostGAPlugin,
but the default can be depending on the success/failure of each channel (see _download_using_appropriate_channel() for the logic to do this).
The 'download_type' is added to any log messages produced by this method; it should describe the type of content of the given URIs
(e.g. "manifest", "extension package, "agent package", etc).
When the download is successful, _download_with_fallback_channel invokes the 'on_downloaded' function, which can be used to process the results of the download. This
function should return True on success, and False on failure (it should not raise any exceptions). If the return value is False, the download is considered
a failure and the next URI is tried.
When the download succeeds, this method returns a (uri, response) tuple where the first item is the URI of the successful download and the second item is
the response returned by the successful channel (i.e. one of direct_download and hgap_download).
This method enforces a timeout (_DOWNLOAD_TIMEOUT) on the download and raises an exception if the limit is exceeded.
"""
logger.info("Downloading {0}", download_type)
start_time = datetime.now()
uris_shuffled = uris
random.shuffle(uris_shuffled)
most_recent_error = "None"
for index, uri in enumerate(uris_shuffled):
elapsed = datetime.now() - start_time
if elapsed > _DOWNLOAD_TIMEOUT:
message = "Timeout downloading {0}. Elapsed: {1} URIs tried: {2}/{3}. Last error: {4}".format(download_type, elapsed, index, len(uris), ustr(most_recent_error))
raise ExtensionDownloadError(message, code=ExtensionErrorCodes.PluginManifestDownloadError)
try:
# Disable W0640: OK to use uri in a lambda within the loop's body
response = self._download_using_appropriate_channel(lambda: direct_download(uri), lambda: hgap_download(uri)) # pylint: disable=W0640
if on_downloaded is not None:
on_downloaded()
return uri, response
except Exception as exception:
most_recent_error = exception
raise ExtensionDownloadError("Failed to download {0} from all URIs. Last error: {1}".format(download_type, ustr(most_recent_error)), code=ExtensionErrorCodes.PluginManifestDownloadError)
@staticmethod
def _try_expand_zip_package(package_type, target_file, target_directory):
logger.info("Unzipping {0}: {1}", package_type, target_file)
try:
zipfile.ZipFile(target_file).extractall(target_directory)
except Exception as exception:
logger.error("Error while unzipping {0}: {1}", package_type, ustr(exception))
if os.path.exists(target_directory):
try:
shutil.rmtree(target_directory)
except Exception as exception:
logger.warn("Cannot delete {0}: {1}", target_directory, ustr(exception))
raise
finally:
try:
os.remove(target_file)
except Exception as exception:
logger.warn("Cannot delete {0}: {1}", target_file, ustr(exception))
def stream(self, uri, destination, headers=None, use_proxy=None):
"""
Downloads the content of the given 'uri' and saves it to the 'destination' file.
"""
try:
logger.verbose("Fetch [{0}] with headers [{1}] to file [{2}]", uri, headers, destination)
response = self._fetch_response(uri, headers, use_proxy)
if response is not None and not restutil.request_failed(response):
chunk_size = 1024 * 1024 # 1MB buffer
with open(destination, 'wb', chunk_size) as destination_fh:
complete = False
while not complete:
chunk = response.read(chunk_size)
destination_fh.write(chunk)
complete = len(chunk) < chunk_size
return ""
except:
if os.path.exists(destination): # delete the destination file, in case we did a partial download
try:
os.remove(destination)
except Exception as exception:
logger.warn("Can't delete {0}: {1}", destination, ustr(exception))
raise
def fetch(self, uri, headers=None, use_proxy=None, decode=True, retry_codes=None, ok_codes=None):
"""
Returns a tuple with the content and headers of the response. The headers are a list of (name, value) tuples.
"""
logger.verbose("Fetch [{0}] with headers [{1}]", uri, headers)
content = None
response_headers = None
response = self._fetch_response(uri, headers, use_proxy, retry_codes=retry_codes, ok_codes=ok_codes)
if response is not None and not restutil.request_failed(response, ok_codes=ok_codes):
response_content = response.read()
content = self.decode_config(response_content) if decode else response_content
response_headers = response.getheaders()
return content, response_headers
def _fetch_response(self, uri, headers=None, use_proxy=None, retry_codes=None, ok_codes=None):
resp = None
try:
resp = self.call_storage_service(
restutil.http_get,
uri,
headers=headers,
use_proxy=use_proxy,
retry_codes=retry_codes)
host_plugin = self.get_host_plugin()
if restutil.request_failed(resp, ok_codes=ok_codes):
error_response = restutil.read_response_error(resp)
msg = "Fetch failed from [{0}]: {1}".format(uri, error_response)
logger.warn(msg)
if host_plugin is not None:
host_plugin.report_fetch_health(uri,
is_healthy=not restutil.request_failed_at_hostplugin(resp),
source='WireClient',
response=error_response)
raise ProtocolError(msg)
else:
if host_plugin is not None:
host_plugin.report_fetch_health(uri, source='WireClient')
except (HttpError, ProtocolError, IOError) as error:
msg = "Fetch failed: {0}".format(error)
logger.warn(msg)
report_event(op=WALAEventOperation.HttpGet, is_success=False, message=msg, log_event=False)
raise
return resp
def update_host_plugin_from_goal_state(self):
"""
Fetches a new goal state and updates the Container ID and Role Config Name of the host plugin client
"""
if self._host_plugin is not None:
GoalState.update_host_plugin_headers(self)
def update_host_plugin(self, container_id, role_config_name):
if self._host_plugin is not None:
self._host_plugin.update_container_id(container_id)
self._host_plugin.update_role_config_name(role_config_name)
def update_goal_state(self, silent=False, save_to_history=False):
"""
Updates the goal state if the incarnation or etag changed
"""
try:
if self._goal_state is None:
self._goal_state = GoalState(self, silent=silent, save_to_history=save_to_history)
else:
self._goal_state.update(silent=silent)
except ProtocolError:
raise
except Exception as exception:
raise ProtocolError("Error fetching goal state: {0}".format(ustr(exception)))
def reset_goal_state(self, goal_state_properties=GoalStateProperties.All, silent=False, save_to_history=False):
"""
Resets the goal state
"""
try:
if not silent:
logger.info("Forcing an update of the goal state.")
self._goal_state = GoalState(self, goal_state_properties=goal_state_properties, silent=silent, save_to_history=save_to_history)
except ProtocolError:
raise
except Exception as exception:
raise ProtocolError("Error fetching goal state: {0}".format(ustr(exception)))
def get_goal_state(self):
if self._goal_state is None:
raise ProtocolError("Trying to fetch goal state before initialization!")
return self._goal_state
def get_hosting_env(self):
if self._goal_state is None:
raise ProtocolError("Trying to fetch Hosting Environment before initialization!")
return self._goal_state.hosting_env
def get_shared_conf(self):
if self._goal_state is None:
raise ProtocolError("Trying to fetch Shared Conf before initialization!")
return self._goal_state.shared_conf
def get_certs(self):
if self._goal_state is None:
raise ProtocolError("Trying to fetch Certificates before initialization!")
return self._goal_state.certs
def get_remote_access(self):
if self._goal_state is None:
raise ProtocolError("Trying to fetch Remote Access before initialization!")
return self._goal_state.remote_access
def check_wire_protocol_version(self):
uri = VERSION_INFO_URI.format(self.get_endpoint())
version_info_xml = self.fetch_config(uri, None)
version_info = VersionInfo(version_info_xml)
preferred = version_info.get_preferred()
if PROTOCOL_VERSION == preferred:
logger.info("Wire protocol version:{0}", PROTOCOL_VERSION)
elif PROTOCOL_VERSION in version_info.get_supported():
logger.info("Wire protocol version:{0}", PROTOCOL_VERSION)
logger.info("Server preferred version:{0}", preferred)
else:
error = ("Agent supported wire protocol version: {0} was not "
"advised by Fabric.").format(PROTOCOL_VERSION)
raise ProtocolNotFoundError(error)
def _call_hostplugin_with_container_check(self, host_func):
"""
Calls host_func on host channel and accounts for stale resource (ResourceGoneError or InvalidContainerError).
If stale, it refreshes the goal state and retries host_func.
"""
try:
return host_func()
except (ResourceGoneError, InvalidContainerError) as error:
host_plugin = self.get_host_plugin()
old_container_id, old_role_config_name = host_plugin.container_id, host_plugin.role_config_name
msg = "[PERIODIC] Request failed with the current host plugin configuration. " \
"ContainerId: {0}, role config file: {1}. Fetching new goal state and retrying the call." \
"Error: {2}".format(old_container_id, old_role_config_name, ustr(error))
logger.periodic_info(logger.EVERY_SIX_HOURS, msg)
self.update_host_plugin_from_goal_state()
new_container_id, new_role_config_name = host_plugin.container_id, host_plugin.role_config_name
msg = "[PERIODIC] Host plugin reconfigured with new parameters. " \
"ContainerId: {0}, role config file: {1}.".format(new_container_id, new_role_config_name)
logger.periodic_info(logger.EVERY_SIX_HOURS, msg)
try:
ret = host_func()
msg = "[PERIODIC] Request succeeded using the host plugin channel after goal state refresh. " \
"ContainerId changed from {0} to {1}, " \
"role config file changed from {2} to {3}.".format(old_container_id, new_container_id,
old_role_config_name, new_role_config_name)
add_periodic(delta=logger.EVERY_SIX_HOURS,
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.HostPlugin,
is_success=True,
message=msg,
log_event=True)
return ret
except (ResourceGoneError, InvalidContainerError) as error:
msg = "[PERIODIC] Request failed using the host plugin channel after goal state refresh. " \
"ContainerId changed from {0} to {1}, role config file changed from {2} to {3}. " \
"Exception type: {4}.".format(old_container_id, new_container_id, old_role_config_name,
new_role_config_name, type(error).__name__)
add_periodic(delta=logger.EVERY_SIX_HOURS,
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.HostPlugin,
is_success=False,
message=msg,
log_event=True)
raise
def _download_using_appropriate_channel(self, direct_download, hgap_download):
"""
Does a download using both the default and fallback channels. By default, the primary channel is direct, host channel is the fallback.
We call the primary channel first and return on success. If primary fails, we try the fallback. If fallback fails,
we return and *don't* switch the default channel. If fallback succeeds, we change the default channel.
"""
hgap_download_function_with_retry = lambda: self._call_hostplugin_with_container_check(hgap_download)
if HostPluginProtocol.is_default_channel:
primary_channel, secondary_channel = hgap_download_function_with_retry, direct_download
else:
primary_channel, secondary_channel = direct_download, hgap_download_function_with_retry
try:
return primary_channel()
except Exception as exception:
primary_channel_error = exception
try:
return_value = secondary_channel()
# Since the secondary channel succeeded, flip the default channel
HostPluginProtocol.is_default_channel = not HostPluginProtocol.is_default_channel
message = "Default channel changed to {0} channel.".format("HostGAPlugin" if HostPluginProtocol.is_default_channel else "Direct")
logger.info(message)
add_event(AGENT_NAME, op=WALAEventOperation.DefaultChannelChange, version=CURRENT_VERSION, is_success=True, message=message, log_event=False)
return return_value
except Exception as exception:
raise HttpError("Download failed both on the primary and fallback channels. Primary: [{0}] Fallback: [{1}]".format(ustr(primary_channel_error), ustr(exception)))
def upload_status_blob(self):
extensions_goal_state = self.get_goal_state().extensions_goal_state
if extensions_goal_state.status_upload_blob is None:
# the status upload blob is in ExtensionsConfig so force a full goal state refresh
self.reset_goal_state(silent=True, save_to_history=True)
extensions_goal_state = self.get_goal_state().extensions_goal_state
if extensions_goal_state.status_upload_blob is None:
raise ProtocolNotFoundError("Status upload uri is missing")
logger.info("Refreshed the goal state to get the status upload blob. New Goal State ID: {0}", extensions_goal_state.id)
blob_type = extensions_goal_state.status_upload_blob_type
try:
self.status_blob.prepare(blob_type)
except Exception as e:
raise ProtocolError("Exception creating status blob: {0}".format(ustr(e)))
# Swap the order of use for the HostPlugin vs. the "direct" route.
# Prefer the use of HostPlugin. If HostPlugin fails fall back to the
# direct route.
#
# The code previously preferred the "direct" route always, and only fell back
# to the HostPlugin *if* there was an error. We would like to move to
# the HostPlugin for all traffic, but this is a big change. We would like
# to see how this behaves at scale, and have a fallback should things go
# wrong. This is why we try HostPlugin then direct.
try:
host = self.get_host_plugin()
host.put_vm_status(self.status_blob, extensions_goal_state.status_upload_blob, extensions_goal_state.status_upload_blob_type)
return
except ResourceGoneError:
# refresh the host plugin client and try again on the next iteration of the main loop
self.update_host_plugin_from_goal_state()
return
except Exception as e:
# for all other errors, fall back to direct
msg = "Falling back to direct upload: {0}".format(ustr(e))
self.report_status_event(msg, is_success=True)
try:
if self.status_blob.upload(extensions_goal_state.status_upload_blob):
return
except Exception as e:
msg = "Exception uploading status blob: {0}".format(ustr(e))
self.report_status_event(msg, is_success=False)
raise ProtocolError("Failed to upload status blob via either channel")
def report_role_prop(self, thumbprint):
goal_state = self.get_goal_state()
role_prop = _build_role_properties(goal_state.container_id,
goal_state.role_instance_id,
thumbprint)
role_prop = role_prop.encode("utf-8")
role_prop_uri = ROLE_PROP_URI.format(self.get_endpoint())
headers = self.get_header_for_xml_content()
try:
resp = self.call_wireserver(restutil.http_post,
role_prop_uri,
role_prop,
headers=headers)
except HttpError as e:
raise ProtocolError((u"Failed to send role properties: "
u"{0}").format(e))
if resp.status != httpclient.ACCEPTED:
raise ProtocolError((u"Failed to send role properties: "
u",{0}: {1}").format(resp.status,
resp.read()))
def report_health(self, status, substatus, description):
goal_state = self.get_goal_state()
health_report = _build_health_report(goal_state.incarnation,
goal_state.container_id,
goal_state.role_instance_id,
status,
substatus,
description)
health_report = health_report.encode("utf-8")
health_report_uri = HEALTH_REPORT_URI.format(self.get_endpoint())
headers = self.get_header_for_xml_content()
try:
# 30 retries with 10s sleep gives ~5min for wireserver updates;
# this is retried 3 times with 15s sleep before throwing a
# ProtocolError, for a total of ~15min.
resp = self.call_wireserver(restutil.http_post,
health_report_uri,
health_report,
headers=headers,
max_retry=30,
retry_delay=15)
except HttpError as e:
raise ProtocolError((u"Failed to send provision status: "
u"{0}").format(e))
if restutil.request_failed(resp):
raise ProtocolError((u"Failed to send provision status: "
u",{0}: {1}").format(resp.status,
resp.read()))
def send_encoded_event(self, provider_id, event_str, encoding='utf8'):
uri = TELEMETRY_URI.format(self.get_endpoint())
data_format_header = ustr('').format(
provider_id).encode(encoding)
data_format_footer = ustr('').encode(encoding)
# Event string should already be encoded by the time it gets here, to avoid double encoding,
# dividing it into parts.
data = data_format_header + event_str + data_format_footer
try:
header = self.get_header_for_xml_content()
# NOTE: The call to wireserver requests utf-8 encoding in the headers, but the body should not
# be encoded: some nodes in the telemetry pipeline do not support utf-8 encoding.
resp = self.call_wireserver(restutil.http_post, uri, data, header)
except HttpError as e:
raise ProtocolError("Failed to send events:{0}".format(e))
if restutil.request_failed(resp):
logger.verbose(resp.read())
raise ProtocolError(
"Failed to send events:{0}".format(resp.status))
def report_event(self, events_iterator):
buf = {}
debug_info = CollectOrReportEventDebugInfo(operation=CollectOrReportEventDebugInfo.OP_REPORT)
events_per_provider = defaultdict(int)
def _send_event(provider_id, debug_info):
try:
self.send_encoded_event(provider_id, buf[provider_id])
except UnicodeError as uni_error:
debug_info.update_unicode_error(uni_error)
except Exception as error:
debug_info.update_op_error(error)
# Group events by providerId
for event in events_iterator:
try:
if event.providerId not in buf:
buf[event.providerId] = b""
event_str = event_to_v1_encoded(event)
if len(event_str) >= MAX_EVENT_BUFFER_SIZE:
# Ignore single events that are too large to send out
details_of_event = [ustr(x.name) + ":" + ustr(x.value) for x in event.parameters if x.name in
[GuestAgentExtensionEventsSchema.Name, GuestAgentExtensionEventsSchema.Version,
GuestAgentExtensionEventsSchema.Operation,
GuestAgentExtensionEventsSchema.OperationSuccess]]
logger.periodic_warn(logger.EVERY_HALF_HOUR,
"Single event too large: {0}, with the length: {1} more than the limit({2})"
.format(str(details_of_event), len(event_str), MAX_EVENT_BUFFER_SIZE))
continue
# If buffer is full, send out the events in buffer and reset buffer
if len(buf[event.providerId] + event_str) >= MAX_EVENT_BUFFER_SIZE:
logger.verbose("No of events this request = {0}".format(events_per_provider[event.providerId]))
_send_event(event.providerId, debug_info)
buf[event.providerId] = b""
events_per_provider[event.providerId] = 0
# Add encoded events to the buffer
buf[event.providerId] = buf[event.providerId] + event_str
events_per_provider[event.providerId] += 1
except Exception as error:
logger.warn("Unexpected error when generating Events:{0}", textutil.format_exception(error))
# Send out all events left in buffer.
for provider_id in list(buf.keys()):
if buf[provider_id]:
logger.verbose("No of events this request = {0}".format(events_per_provider[provider_id]))
_send_event(provider_id, debug_info)
debug_info.report_debug_info()
def report_status_event(self, message, is_success):
report_event(op=WALAEventOperation.ReportStatus,
is_success=is_success,
message=message,
log_event=not is_success)
def get_header(self):
return {
"x-ms-agent-name": "WALinuxAgent",
"x-ms-version": PROTOCOL_VERSION
}
def get_header_for_xml_content(self):
return {
"x-ms-agent-name": "WALinuxAgent",
"x-ms-version": PROTOCOL_VERSION,
"Content-Type": "text/xml;charset=utf-8"
}
def get_header_for_cert(self):
trans_cert_file = os.path.join(conf.get_lib_dir(), TRANSPORT_CERT_FILE_NAME)
try:
content = fileutil.read_file(trans_cert_file)
except IOError as e:
raise ProtocolError("Failed to read {0}: {1}".format(trans_cert_file, e))
cert = get_bytes_from_pem(content)
return {
"x-ms-agent-name": "WALinuxAgent",
"x-ms-version": PROTOCOL_VERSION,
"x-ms-cipher-name": "DES_EDE3_CBC",
"x-ms-guest-agent-public-x509-cert": cert
}
def get_host_plugin(self):
if self._host_plugin is None:
self._host_plugin = HostPluginProtocol(self.get_endpoint())
GoalState.update_host_plugin_headers(self)
return self._host_plugin
def get_on_hold(self):
return self.get_goal_state().extensions_goal_state.on_hold
def upload_logs(self, content):
host = self.get_host_plugin()
return host.put_vm_log(content)
class VersionInfo(object):
def __init__(self, xml_text):
"""
Query endpoint server for wire protocol version.
Fail if our desired protocol version is not seen.
"""
logger.verbose("Load Version.xml")
self.parse(xml_text)
def parse(self, xml_text):
xml_doc = parse_doc(xml_text)
preferred = find(xml_doc, "Preferred")
self.preferred = findtext(preferred, "Version")
logger.info("Fabric preferred wire protocol version:{0}",
self.preferred)
self.supported = []
supported = find(xml_doc, "Supported")
supported_version = findall(supported, "Version")
for node in supported_version:
version = gettext(node)
logger.verbose("Fabric supported wire protocol version:{0}",
version)
self.supported.append(version)
def get_preferred(self):
return self.preferred
def get_supported(self):
return self.supported
# Do not extend this class
class InVMArtifactsProfile(object):
"""
deserialized json string of InVMArtifactsProfile.
It is expected to contain the following fields:
* inVMArtifactsProfileBlobSeqNo
* profileId (optional)
* onHold (optional)
* certificateThumbprint (optional)
* encryptedHealthChecks (optional)
* encryptedApplicationProfile (optional)
"""
def __init__(self, artifacts_profile):
if not textutil.is_str_empty(artifacts_profile):
self.__dict__.update(parse_json(artifacts_profile))
def is_on_hold(self):
# hasattr() is not available in Python 2.6
if 'onHold' in self.__dict__:
return str(self.onHold).lower() == 'true' # pylint: disable=E1101
return False
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/singletonperthread.py 0000664 0000000 0000000 00000003056 14626177470 0026524 0 ustar 00root root 0000000 0000000 from threading import Lock, currentThread
class _SingletonPerThreadMetaClass(type):
""" A metaclass that creates a SingletonPerThread base class when called. """
_instances = {}
_lock = Lock()
def __call__(cls, *args, **kwargs):
with cls._lock:
obj_name = "%s__%s" % (cls.__name__, currentThread().getName()) # Object Name = className__threadName
if obj_name not in cls._instances:
cls._instances[obj_name] = super(_SingletonPerThreadMetaClass, cls).__call__(*args, **kwargs)
return cls._instances[obj_name]
class SingletonPerThread(_SingletonPerThreadMetaClass('SingleObjectPerThreadMetaClass', (object,), {})):
# This base class calls the metaclass above to create the singleton per thread object. This class provides an
# abstraction over how to invoke the Metaclass so just inheriting this class makes the
# child class a singleton per thread (As opposed to invoking the Metaclass separately for each derived classes)
# More info here - https://stackoverflow.com/questions/6760685/creating-a-singleton-in-python
#
# Usage:
# Inheriting this class will create a Singleton per thread for that class
# To delete the cached object of a class, call DerivedClassName.clear() to delete the object per thread
# Note: If the thread dies and is recreated with the same thread name, the existing object would be reused
# and no new object for the derived class would be created unless DerivedClassName.clear() is called explicitly to
# delete the cache
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/telemetryevent.py 0000664 0000000 0000000 00000007330 14626177470 0025676 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2019 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.common.datacontract import DataContract, DataContractList
from azurelinuxagent.common.version import AGENT_NAME
class CommonTelemetryEventSchema(object):
# Common schema keys for GuestAgentExtensionEvents, GuestAgentGenericLogs
# and GuestAgentPerformanceCounterEvents tables in Kusto.
EventPid = "EventPid"
EventTid = "EventTid"
GAVersion = "GAVersion"
ContainerId = "ContainerId"
TaskName = "TaskName"
OpcodeName = "OpcodeName"
KeywordName = "KeywordName"
OSVersion = "OSVersion"
ExecutionMode = "ExecutionMode"
RAM = "RAM"
Processors = "Processors"
TenantName = "TenantName"
RoleName = "RoleName"
RoleInstanceName = "RoleInstanceName"
Location = "Location"
SubscriptionId = "SubscriptionId"
ResourceGroupName = "ResourceGroupName"
VMId = "VMId"
ImageOrigin = "ImageOrigin"
class GuestAgentGenericLogsSchema(CommonTelemetryEventSchema):
# GuestAgentGenericLogs table specific schema keys
EventName = "EventName"
CapabilityUsed = "CapabilityUsed"
Context1 = "Context1"
Context2 = "Context2"
Context3 = "Context3"
class GuestAgentExtensionEventsSchema(CommonTelemetryEventSchema):
# GuestAgentExtensionEvents table specific schema keys
ExtensionType = "ExtensionType"
IsInternal = "IsInternal"
Name = "Name"
Version = "Version"
Operation = "Operation"
OperationSuccess = "OperationSuccess"
Message = "Message"
Duration = "Duration"
class GuestAgentPerfCounterEventsSchema(CommonTelemetryEventSchema):
# GuestAgentPerformanceCounterEvents table specific schema keys
Category = "Category"
Counter = "Counter"
Instance = "Instance"
Value = "Value"
class TelemetryEventParam(DataContract):
def __init__(self, name=None, value=None):
self.name = name
self.value = value
def __eq__(self, other):
return isinstance(other, TelemetryEventParam) and other.name == self.name and other.value == self.value
class TelemetryEvent(DataContract):
def __init__(self, eventId=None, providerId=None):
self.eventId = eventId
self.providerId = providerId
self.parameters = DataContractList(TelemetryEventParam)
self.file_type = ""
# Checking if the particular param name is in the TelemetryEvent.
def __contains__(self, param_name):
return param_name in [param.name for param in self.parameters]
def is_extension_event(self):
# Events originating from the agent have "WALinuxAgent" as the Name parameter, or they don't have a Name
# parameter, in the case of log and metric events. So, in case the Name parameter exists and it is not
# "WALinuxAgent", it is an extension event.
for param in self.parameters:
if param.name == GuestAgentExtensionEventsSchema.Name:
return param.value != AGENT_NAME
return False
def get_version(self):
for param in self.parameters:
if param.name == GuestAgentExtensionEventsSchema.Version:
return param.value
return None
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/ 0000775 0000000 0000000 00000000000 14626177470 0023405 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/__init__.py 0000664 0000000 0000000 00000001166 14626177470 0025522 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/archive.py 0000664 0000000 0000000 00000025507 14626177470 0025411 0 ustar 00root root 0000000 0000000 # Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache License.
import errno
import glob
import os
import re
import shutil
import zipfile
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.utils import fileutil, timeutil
# pylint: disable=W0105
"""
archive.py
The module supports the archiving of guest agent state. Guest
agent state is flushed whenever there is a incarnation change.
The flush is archived periodically (once a day).
The process works as follows whenever a new incarnation arrives.
1. Flush - move all state files to a new directory under
.../history/timestamp/.
2. Archive - enumerate all directories under .../history/timestamp
and create a .zip file named timestamp.zip. Delete the archive
directory
3. Purge - glob the list .zip files, sort by timestamp in descending
order, keep the first 50 results, and delete the rest.
... is the directory where the agent's state resides, by default this
is /var/lib/waagent.
The timestamp is an ISO8601 formatted value.
"""
# pylint: enable=W0105
ARCHIVE_DIRECTORY_NAME = 'history'
# TODO: See comment in GoalStateHistory._save_placeholder and remove this code when no longer needed
_PLACEHOLDER_FILE_NAME = 'GoalState.1.xml'
# END TODO
_MAX_ARCHIVED_STATES = 50
_CACHE_PATTERNS = [
#
# Note that SharedConfig.xml is not included here; this file is used by other components (Azsec and Singularity/HPC Infiniband)
#
re.compile(r"^VmSettings\.\d+\.json$"),
re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE),
re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE),
re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE),
re.compile(r"^HostingEnvironmentConfig\.xml$", re.IGNORECASE),
re.compile(r"^RemoteAccess\.xml$", re.IGNORECASE),
re.compile(r"^waagent_status\.\d+\.json$"),
]
#
# Legacy names
# 2018-04-06T08:21:37.142697
# 2018-04-06T08:21:37.142697.zip
# 2018-04-06T08:21:37.142697_incarnation_N
# 2018-04-06T08:21:37.142697_incarnation_N.zip
# 2018-04-06T08:21:37.142697_N-M
# 2018-04-06T08:21:37.142697_N-M.zip
#
# Current names
#
# 2018-04-06T08-21-37__N-M
# 2018-04-06T08-21-37__N-M.zip
#
_ARCHIVE_BASE_PATTERN = r"\d{4}\-\d{2}\-\d{2}T\d{2}[:-]\d{2}[:-]\d{2}(\.\d+)?((_incarnation)?_+(\d+|status)(-\d+)?)?"
_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r'^{0}$'.format(_ARCHIVE_BASE_PATTERN))
_ARCHIVE_PATTERNS_ZIP = re.compile(r'^{0}\.zip$'.format(_ARCHIVE_BASE_PATTERN))
_GOAL_STATE_FILE_NAME = "GoalState.xml"
_VM_SETTINGS_FILE_NAME = "VmSettings.json"
_CERTIFICATES_FILE_NAME = "Certificates.json"
_HOSTING_ENV_FILE_NAME = "HostingEnvironmentConfig.xml"
_REMOTE_ACCESS_FILE_NAME = "RemoteAccess.xml"
_EXT_CONF_FILE_NAME = "ExtensionsConfig.xml"
_MANIFEST_FILE_NAME = "{0}.manifest.xml"
AGENT_STATUS_FILE = "waagent_status.json"
SHARED_CONF_FILE_NAME = "SharedConfig.xml"
# TODO: use @total_ordering once RHEL/CentOS and SLES 11 are EOL.
# @total_ordering first appeared in Python 2.7 and 3.2
# If there are more use cases for @total_ordering, I will
# consider re-implementing it.
class State(object):
def __init__(self, path, timestamp):
self._path = path
self._timestamp = timestamp
@property
def timestamp(self):
return self._timestamp
def delete(self):
pass
def archive(self):
pass
def __eq__(self, other):
return self._timestamp == other.timestamp
def __ne__(self, other):
return self._timestamp != other.timestamp
def __lt__(self, other):
return self._timestamp < other.timestamp
def __gt__(self, other):
return self._timestamp > other.timestamp
def __le__(self, other):
return self._timestamp <= other.timestamp
def __ge__(self, other):
return self._timestamp >= other.timestamp
class StateZip(State):
def delete(self):
os.remove(self._path)
class StateDirectory(State):
def delete(self):
shutil.rmtree(self._path)
def archive(self):
fn_tmp = "{0}.zip.tmp".format(self._path)
filename = "{0}.zip".format(self._path)
ziph = None
try:
# contextmanager for zipfile.ZipFile doesn't exist for py2.6, manually closing it
ziph = zipfile.ZipFile(fn_tmp, 'w')
for current_file in os.listdir(self._path):
full_path = os.path.join(self._path, current_file)
ziph.write(full_path, current_file, zipfile.ZIP_DEFLATED)
finally:
if ziph is not None:
ziph.close()
os.rename(fn_tmp, filename)
shutil.rmtree(self._path)
class StateArchiver(object):
def __init__(self, lib_dir):
self._source = os.path.join(lib_dir, ARCHIVE_DIRECTORY_NAME)
if not os.path.isdir(self._source):
try:
fileutil.mkdir(self._source, mode=0o700)
except IOError as exception:
if exception.errno != errno.EEXIST:
logger.warn("{0} : {1}", self._source, exception.strerror)
@staticmethod
def purge_legacy_goal_state_history():
lib_dir = conf.get_lib_dir()
for current_file in os.listdir(lib_dir):
# Don't remove the placeholder goal state file.
# TODO: See comment in GoalStateHistory._save_placeholder and remove this code when no longer needed
if current_file == _PLACEHOLDER_FILE_NAME:
continue
# END TODO
full_path = os.path.join(lib_dir, current_file)
for pattern in _CACHE_PATTERNS:
match = pattern.match(current_file)
if match is not None:
try:
os.remove(full_path)
except Exception as e:
logger.warn("Cannot delete legacy history file '{0}': {1}".format(full_path, e))
break
def archive(self):
states = self._get_archive_states()
if len(states) > 0:
# Skip the most recent goal state, since it may still be in use
for state in states[1:]:
state.archive()
def _get_archive_states(self):
states = []
for current_file in os.listdir(self._source):
full_path = os.path.join(self._source, current_file)
match = _ARCHIVE_PATTERNS_DIRECTORY.match(current_file)
if match is not None:
states.append(StateDirectory(full_path, match.group(0)))
match = _ARCHIVE_PATTERNS_ZIP.match(current_file)
if match is not None:
states.append(StateZip(full_path, match.group(0)))
states.sort(key=lambda state: os.path.getctime(state._path), reverse=True)
return states
class GoalStateHistory(object):
def __init__(self, time, tag):
self._errors = False
timestamp = timeutil.create_history_timestamp(time)
self._root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "{0}__{1}".format(timestamp, tag) if tag is not None else timestamp)
GoalStateHistory._purge()
@staticmethod
def tag_exists(tag):
"""
Returns True when an item with the given 'tag' already exists in the history directory
"""
return len(glob.glob(os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "*_{0}".format(tag)))) > 0
def save(self, data, file_name):
try:
if not os.path.exists(self._root):
fileutil.mkdir(self._root, mode=0o700)
with open(os.path.join(self._root, file_name), "w") as handle:
handle.write(data)
except Exception as e:
if not self._errors: # report only 1 error per directory
self._errors = True
logger.warn("Failed to save {0} to the goal state history: {1} [no additional errors saving the goal state will be reported]".format(file_name, e))
_purge_error_count = 0
@staticmethod
def _purge():
"""
Delete "old" history directories and .zip archives. Old is defined as any directories or files older than the X newest ones.
"""
try:
history_root = os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME)
if not os.path.exists(history_root):
return
items = []
for current_item in os.listdir(history_root):
full_path = os.path.join(history_root, current_item)
items.append(full_path)
items.sort(key=os.path.getctime, reverse=True)
for current_item in items[_MAX_ARCHIVED_STATES:]:
if os.path.isfile(current_item):
os.remove(current_item)
else:
shutil.rmtree(current_item)
if GoalStateHistory._purge_error_count > 0:
GoalStateHistory._purge_error_count = 0
# Log a success message when we are recovering from errors.
logger.info("Successfully cleaned up the goal state history directory")
except Exception as e:
GoalStateHistory._purge_error_count += 1
if GoalStateHistory._purge_error_count < 5:
logger.warn("Failed to clean up the goal state history directory: {0}".format(e))
elif GoalStateHistory._purge_error_count == 5:
logger.warn("Failed to clean up the goal state history directory [will stop reporting these errors]: {0}".format(e))
@staticmethod
def _save_placeholder():
"""
Some internal components took a dependency in the legacy GoalState.*.xml file. We create it here while those components are updated to remove the dependency.
When removing this code, also remove the check in StateArchiver.purge_legacy_goal_state_history, and the definition of _PLACEHOLDER_FILE_NAME
"""
try:
placeholder = os.path.join(conf.get_lib_dir(), _PLACEHOLDER_FILE_NAME)
with open(placeholder, "w") as handle:
handle.write("empty placeholder file")
except Exception as e:
logger.warn("Failed to save placeholder file ({0}): {1}".format(_PLACEHOLDER_FILE_NAME, e))
def save_goal_state(self, text):
self.save(text, _GOAL_STATE_FILE_NAME)
self._save_placeholder()
def save_extensions_config(self, text):
self.save(text, _EXT_CONF_FILE_NAME)
def save_vm_settings(self, text):
self.save(text, _VM_SETTINGS_FILE_NAME)
def save_remote_access(self, text):
self.save(text, _REMOTE_ACCESS_FILE_NAME)
def save_certificates(self, text):
self.save(text, _CERTIFICATES_FILE_NAME)
def save_hosting_env(self, text):
self.save(text, _HOSTING_ENV_FILE_NAME)
def save_shared_conf(self, text):
self.save(text, SHARED_CONF_FILE_NAME)
def save_manifest(self, name, text):
self.save(text, _MANIFEST_FILE_NAME.format(name))
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/cryptutil.py 0000664 0000000 0000000 00000016460 14626177470 0026025 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import base64
import errno
import struct
import os.path
import subprocess
from azurelinuxagent.common.future import ustr, bytebuffer
from azurelinuxagent.common.exception import CryptError
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
DECRYPT_SECRET_CMD = "{0} cms -decrypt -inform DER -inkey {1} -in /dev/stdin"
class CryptUtil(object):
def __init__(self, openssl_cmd):
self.openssl_cmd = openssl_cmd
def gen_transport_cert(self, prv_file, crt_file):
"""
Create ssl certificate for https communication with endpoint server.
"""
cmd = [self.openssl_cmd, "req", "-x509", "-nodes", "-subj", "/CN=LinuxTransport",
"-days", "730", "-newkey", "rsa:2048", "-keyout", prv_file, "-out", crt_file]
try:
shellutil.run_command(cmd)
except shellutil.CommandError as cmd_err:
msg = "Failed to create {0} and {1} certificates.\n[stdout]\n{2}\n\n[stderr]\n{3}\n"\
.format(prv_file, crt_file, cmd_err.stdout, cmd_err.stderr)
logger.error(msg)
def get_pubkey_from_prv(self, file_name):
if not os.path.exists(file_name):
raise IOError(errno.ENOENT, "File not found", file_name)
# OpenSSL's pkey command may not be available on older versions so try 'rsa' first.
try:
command = [self.openssl_cmd, "rsa", "-in", file_name, "-pubout"]
return shellutil.run_command(command, log_error=False)
except shellutil.CommandError as error:
if not ("Not an RSA key" in error.stderr or "expecting an rsa key" in error.stderr):
logger.error(
"Command: [{0}], return code: [{1}], stdout: [{2}] stderr: [{3}]",
" ".join(command),
error.returncode,
error.stdout,
error.stderr)
raise
return shellutil.run_command([self.openssl_cmd, "pkey", "-in", file_name, "-pubout"], log_error=True)
def get_pubkey_from_crt(self, file_name):
if not os.path.exists(file_name):
raise IOError(errno.ENOENT, "File not found", file_name)
else:
cmd = [self.openssl_cmd, "x509", "-in", file_name, "-pubkey", "-noout"]
pub = shellutil.run_command(cmd, log_error=True)
return pub
def get_thumbprint_from_crt(self, file_name):
if not os.path.exists(file_name):
raise IOError(errno.ENOENT, "File not found", file_name)
else:
cmd = [self.openssl_cmd, "x509", "-in", file_name, "-fingerprint", "-noout"]
thumbprint = shellutil.run_command(cmd)
thumbprint = thumbprint.rstrip().split('=')[1].replace(':', '').upper()
return thumbprint
def decrypt_p7m(self, p7m_file, trans_prv_file, trans_cert_file, pem_file):
if not os.path.exists(p7m_file):
raise IOError(errno.ENOENT, "File not found", p7m_file)
elif not os.path.exists(trans_prv_file):
raise IOError(errno.ENOENT, "File not found", trans_prv_file)
else:
try:
shellutil.run_pipe([
[self.openssl_cmd, "cms", "-decrypt", "-in", p7m_file, "-inkey", trans_prv_file, "-recip", trans_cert_file],
[self.openssl_cmd, "pkcs12", "-nodes", "-password", "pass:", "-out", pem_file]])
except shellutil.CommandError as command_error:
logger.error("Failed to decrypt {0} (return code: {1})\n[stdout]\n{2}\n[stderr]\n{3}",
p7m_file, command_error.returncode, command_error.stdout, command_error.stderr)
def crt_to_ssh(self, input_file, output_file):
with open(output_file, "ab") as file_out:
cmd = ["ssh-keygen", "-i", "-m", "PKCS8", "-f", input_file]
try:
shellutil.run_command(cmd, stdout=file_out, log_error=True)
except shellutil.CommandError:
pass # nothing to do; the error is already logged
def asn1_to_ssh(self, pubkey):
lines = pubkey.split("\n")
lines = [x for x in lines if not x.startswith("----")]
base64_encoded = "".join(lines)
try:
#TODO remove pyasn1 dependency
from pyasn1.codec.der import decoder as der_decoder
der_encoded = base64.b64decode(base64_encoded)
der_encoded = der_decoder.decode(der_encoded)[0][1] # pylint: disable=unsubscriptable-object
key = der_decoder.decode(self.bits_to_bytes(der_encoded))[0]
n=key[0] # pylint: disable=unsubscriptable-object
e=key[1] # pylint: disable=unsubscriptable-object
keydata = bytearray()
keydata.extend(struct.pack('>I', len("ssh-rsa")))
keydata.extend(b"ssh-rsa")
keydata.extend(struct.pack('>I', len(self.num_to_bytes(e))))
keydata.extend(self.num_to_bytes(e))
keydata.extend(struct.pack('>I', len(self.num_to_bytes(n)) + 1))
keydata.extend(b"\0")
keydata.extend(self.num_to_bytes(n))
keydata_base64 = base64.b64encode(bytebuffer(keydata))
return ustr(b"ssh-rsa " + keydata_base64 + b"\n",
encoding='utf-8')
except ImportError as e:
raise CryptError("Failed to load pyasn1.codec.der")
def num_to_bytes(self, num):
"""
Pack number into bytes. Retun as string.
"""
result = bytearray()
while num:
result.append(num & 0xFF)
num >>= 8
result.reverse()
return result
def bits_to_bytes(self, bits):
"""
Convert an array contains bits, [0,1] to a byte array
"""
index = 7
byte_array = bytearray()
curr = 0
for bit in bits:
curr = curr | (bit << index)
index = index - 1
if index == -1:
byte_array.append(curr)
curr = 0
index = 7
return bytes(byte_array)
def decrypt_secret(self, encrypted_password, private_key):
try:
decoded = base64.b64decode(encrypted_password)
args = DECRYPT_SECRET_CMD.format(self.openssl_cmd, private_key).split(' ')
output = shellutil.run_command(args, input=decoded, stderr=subprocess.STDOUT, encode_input=False, encode_output=False)
return output.decode('utf-16')
except shellutil.CommandError as command_error:
raise subprocess.CalledProcessError(command_error.returncode, "openssl cms -decrypt", output=command_error.stdout)
except Exception as e:
raise CryptError("Error decoding secret", e)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/fileutil.py 0000664 0000000 0000000 00000015400 14626177470 0025574 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
"""
File operation util functions
"""
import errno as errno
import glob
import os
import pwd
import re
import shutil
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.textutil as textutil
from azurelinuxagent.common.future import ustr
KNOWN_IOERRORS = [
errno.EIO, # I/O error
errno.ENOMEM, # Out of memory
errno.ENFILE, # File table overflow
errno.EMFILE, # Too many open files
errno.ENOSPC, # Out of space
errno.ENAMETOOLONG, # Name too long
errno.ELOOP, # Too many symbolic links encountered
121 # Remote I/O error (errno.EREMOTEIO -- not present in all Python 2.7+)
]
def read_file(filepath, asbin=False, remove_bom=False, encoding='utf-8'):
"""
Read and return contents of 'filepath'.
"""
mode = 'rb'
with open(filepath, mode) as in_file:
data = in_file.read()
if data is None:
return None
if asbin:
return data
if remove_bom:
# remove bom on bytes data before it is converted into string.
data = textutil.remove_bom(data)
data = ustr(data, encoding=encoding)
return data
def write_file(filepath, contents, asbin=False, encoding='utf-8', append=False):
"""
Write 'contents' to 'filepath'.
"""
mode = "ab" if append else "wb"
data = contents
if not asbin:
data = contents.encode(encoding)
with open(filepath, mode) as out_file:
out_file.write(data)
def append_file(filepath, contents, asbin=False, encoding='utf-8'):
"""
Append 'contents' to 'filepath'.
"""
write_file(filepath, contents, asbin=asbin, encoding=encoding, append=True)
def base_name(path):
head, tail = os.path.split(path) # pylint: disable=W0612
return tail
def get_line_startingwith(prefix, filepath):
"""
Return line from 'filepath' if the line startswith 'prefix'
"""
for line in read_file(filepath).split('\n'):
if line.startswith(prefix):
return line
return None
def mkdir(dirpath, mode=None, owner=None, reset_mode_and_owner=True):
if not os.path.isdir(dirpath):
os.makedirs(dirpath)
reset_mode_and_owner = True # force setting the mode and owner
if reset_mode_and_owner:
if mode is not None:
chmod(dirpath, mode)
if owner is not None:
chowner(dirpath, owner)
def chowner(path, owner):
if not os.path.exists(path):
logger.error("Path does not exist: {0}".format(path))
else:
owner_info = pwd.getpwnam(owner)
os.chown(path, owner_info[2], owner_info[3])
def chmod(path, mode):
if not os.path.exists(path):
logger.error("Path does not exist: {0}".format(path))
else:
os.chmod(path, mode)
def rm_files(*args):
for paths in args:
# find all possible file paths
for path in glob.glob(paths):
if os.path.isfile(path):
os.remove(path)
def rm_dirs(*args):
"""
Remove the contents of each directory
"""
for p in args:
if not os.path.isdir(p):
continue
for pp in os.listdir(p):
path = os.path.join(p, pp)
if os.path.isfile(path):
os.remove(path)
elif os.path.islink(path):
os.unlink(path)
elif os.path.isdir(path):
shutil.rmtree(path)
def trim_ext(path, ext):
if not ext.startswith("."):
ext = "." + ext
return path.split(ext)[0] if path.endswith(ext) else path
def update_conf_file(path, line_start, val, chk_err=False):
conf = []
if not os.path.isfile(path) and chk_err:
raise IOError("Can't find config file:{0}".format(path))
conf = read_file(path).split('\n')
conf = [x for x in conf
if x is not None and len(x) > 0 and not x.startswith(line_start)]
conf.append(val)
write_file(path, '\n'.join(conf) + '\n')
def search_file(target_dir_name, target_file_name):
for root, dirs, files in os.walk(target_dir_name): # pylint: disable=W0612
for file_name in files:
if file_name == target_file_name:
return os.path.join(root, file_name)
return None
def chmod_tree(path, mode):
for root, dirs, files in os.walk(path): # pylint: disable=W0612
for file_name in files:
os.chmod(os.path.join(root, file_name), mode)
def findstr_in_file(file_path, line_str):
"""
Return True if the line is in the file; False otherwise.
(Trailing whitespace is ignored.)
"""
try:
with open(file_path, 'r') as fh:
for line in fh.readlines():
if line_str == line.rstrip():
return True
except Exception:
# swallow exception
pass
return False
def findre_in_file(file_path, line_re):
"""
Return match object if found in file.
"""
try:
with open(file_path, 'r') as fh:
pattern = re.compile(line_re)
for line in fh.readlines():
match = re.search(pattern, line)
if match:
return match
except: # pylint: disable=W0702
pass
return None
def get_all_files(root_path):
"""
Find all files under the given root path
"""
result = []
for root, dirs, files in os.walk(root_path): # pylint: disable=W0612
result.extend([os.path.join(root, file) for file in files]) # pylint: disable=redefined-builtin
return result
def clean_ioerror(e, paths=None):
"""
Clean-up possibly bad files and directories after an IO error.
The code ignores *all* errors since disk state may be unhealthy.
"""
if paths is None:
paths = []
if isinstance(e, IOError) and e.errno in KNOWN_IOERRORS:
for path in paths:
if path is None:
continue
try:
if os.path.isdir(path):
shutil.rmtree(path, ignore_errors=True)
else:
os.remove(path)
except Exception:
# swallow exception
pass
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/flexible_version.py 0000664 0000000 0000000 00000017536 14626177470 0027332 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from distutils import version # pylint: disable=no-name-in-module
import re
class FlexibleVersion(version.Version):
"""
A more flexible implementation of distutils.version.StrictVersion
The implementation allows to specify:
- an arbitrary number of version numbers:
not only '1.2.3' , but also '1.2.3.4.5'
- the separator between version numbers:
'1-2-3' is allowed when '-' is specified as separator
- a flexible pre-release separator:
'1.2.3.alpha1', '1.2.3-alpha1', and '1.2.3alpha1' are considered equivalent
- an arbitrary ordering of pre-release tags:
1.1alpha3 < 1.1beta2 < 1.1rc1 < 1.1
when ["alpha", "beta", "rc"] is specified as pre-release tag list
Inspiration from this discussion at StackOverflow:
http://stackoverflow.com/questions/12255554/sort-versions-in-python
"""
def __init__(self, vstring=None, sep='.', prerel_tags=('alpha', 'beta', 'rc')):
version.Version.__init__(self)
if sep is None:
sep = '.'
if prerel_tags is None:
prerel_tags = ()
self.sep = sep
self.prerel_sep = ''
self.prerel_tags = tuple(prerel_tags) if prerel_tags is not None else ()
self._compile_pattern()
self.prerelease = None
self.version = ()
if vstring:
self._parse(str(vstring))
return
_nn_version = 'version'
_nn_prerel_sep = 'prerel_sep'
_nn_prerel_tag = 'tag'
_nn_prerel_num = 'tag_num'
_re_prerel_sep = r'(?P<{pn}>{sep})?'.format(
pn=_nn_prerel_sep,
sep='|'.join(map(re.escape, ('.', '-'))))
@property
def major(self):
return self.version[0] if len(self.version) > 0 else 0
@property
def minor(self):
return self.version[1] if len(self.version) > 1 else 0
@property
def patch(self):
return self.version[2] if len(self.version) > 2 else 0
def _parse(self, vstring):
m = self.version_re.match(vstring)
if not m:
raise ValueError("Invalid version number '{0}'".format(vstring))
self.prerelease = None
self.version = ()
self.prerel_sep = m.group(self._nn_prerel_sep)
tag = m.group(self._nn_prerel_tag)
tag_num = m.group(self._nn_prerel_num)
if tag is not None and tag_num is not None:
self.prerelease = (tag, int(tag_num) if len(tag_num) else None)
self.version = tuple(map(int, self.sep_re.split(m.group(self._nn_version))))
return
def __add__(self, increment):
version = list(self.version) # pylint: disable=W0621
version[-1] += increment
vstring = self._assemble(version, self.sep, self.prerel_sep, self.prerelease)
return FlexibleVersion(vstring=vstring, sep=self.sep, prerel_tags=self.prerel_tags)
def __sub__(self, decrement):
version = list(self.version) # pylint: disable=W0621
if version[-1] <= 0:
raise ArithmeticError("Cannot decrement final numeric component of {0} below zero" \
.format(self))
version[-1] -= decrement
vstring = self._assemble(version, self.sep, self.prerel_sep, self.prerelease)
return FlexibleVersion(vstring=vstring, sep=self.sep, prerel_tags=self.prerel_tags)
def __repr__(self):
return "{cls} ('{vstring}', '{sep}', {prerel_tags})"\
.format(
cls=self.__class__.__name__,
vstring=str(self),
sep=self.sep,
prerel_tags=self.prerel_tags)
def __str__(self):
return self._assemble(self.version, self.sep, self.prerel_sep, self.prerelease)
def __ge__(self, that):
return not self.__lt__(that)
def __gt__(self, that):
return (not self.__lt__(that)) and (not self.__eq__(that))
def __le__(self, that):
return (self.__lt__(that)) or (self.__eq__(that))
def __lt__(self, that):
this_version, that_version = self._ensure_compatible(that)
if this_version != that_version \
or self.prerelease is None and that.prerelease is None:
return this_version < that_version
if self.prerelease is not None and that.prerelease is None:
return True
if self.prerelease is None and that.prerelease is not None:
return False
this_index = self.prerel_tags_set[self.prerelease[0]]
that_index = self.prerel_tags_set[that.prerelease[0]]
if this_index == that_index:
return self.prerelease[1] < that.prerelease[1]
return this_index < that_index
def __ne__(self, that):
return not self.__eq__(that)
def __eq__(self, that):
this_version, that_version = self._ensure_compatible(that)
if this_version != that_version:
return False
if self.prerelease != that.prerelease:
return False
return True
def matches(self, that):
if self.sep != that.sep or len(self.version) > len(that.version):
return False
for i in range(len(self.version)):
if self.version[i] != that.version[i]:
return False
if self.prerel_tags:
return self.prerel_tags == that.prerel_tags
return True
def _assemble(self, version, sep, prerel_sep, prerelease): # pylint: disable=W0621
s = sep.join(map(str, version))
if prerelease is not None:
if prerel_sep is not None:
s += prerel_sep
s += prerelease[0]
if prerelease[1] is not None:
s += str(prerelease[1])
return s
def _compile_pattern(self):
sep, self.sep_re = self._compile_separator(self.sep)
if self.prerel_tags:
tags = '|'.join(re.escape(tag) for tag in self.prerel_tags)
self.prerel_tags_set = dict(zip(self.prerel_tags, range(len(self.prerel_tags))))
release_re = '(?:{prerel_sep}(?P<{tn}>{tags})(?P<{nn}>\d*))?'.format( # pylint: disable=W1401
prerel_sep=self._re_prerel_sep,
tags=tags,
tn=self._nn_prerel_tag,
nn=self._nn_prerel_num)
else:
release_re = ''
version_re = r'^(?P<{vn}>\d+(?:(?:{sep}\d+)*)?){rel}$'.format(
vn=self._nn_version,
sep=sep,
rel=release_re)
self.version_re = re.compile(version_re)
return
def _compile_separator(self, sep):
if sep is None:
return '', re.compile('')
return re.escape(sep), re.compile(re.escape(sep))
def _ensure_compatible(self, that):
"""
Ensures the instances have the same structure and, if so, returns length compatible
version lists (so that x.y.0.0 is equivalent to x.y).
"""
if self.prerel_tags != that.prerel_tags or self.sep != that.sep:
raise ValueError("Unable to compare: versions have different structures")
this_version = list(self.version[:])
that_version = list(that.version[:])
while len(this_version) < len(that_version): this_version.append(0)
while len(that_version) < len(this_version): that_version.append(0)
return this_version, that_version
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/networkutil.py 0000664 0000000 0000000 00000027242 14626177470 0026355 0 ustar 00root root 0000000 0000000 #
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.utils.shellutil import CommandError
class RouteEntry(object):
"""
Represents a single route. The destination, gateway, and mask members are hex representations of the IPv4 address in
network byte order.
"""
def __init__(self, interface, destination, gateway, mask, flags, metric):
self.interface = interface
self.destination = destination
self.gateway = gateway
self.mask = mask
self.flags = int(flags, 16)
self.metric = int(metric)
@staticmethod
def _net_hex_to_dotted_quad(value):
if len(value) != 8:
raise Exception("String to dotted quad conversion must be 8 characters")
octets = []
for idx in range(6, -2, -2):
octets.append(str(int(value[idx:idx + 2], 16)))
return ".".join(octets)
def destination_quad(self):
return self._net_hex_to_dotted_quad(self.destination)
def gateway_quad(self):
return self._net_hex_to_dotted_quad(self.gateway)
def mask_quad(self):
return self._net_hex_to_dotted_quad(self.mask)
def to_json(self):
f = '{{"Iface": "{0}", "Destination": "{1}", "Gateway": "{2}", "Mask": "{3}", "Flags": "{4:#06x}", "Metric": "{5}"}}'
return f.format(self.interface, self.destination_quad(), self.gateway_quad(), self.mask_quad(),
self.flags, self.metric)
def __str__(self):
f = "Iface: {0}\tDestination: {1}\tGateway: {2}\tMask: {3}\tFlags: {4:#06x}\tMetric: {5}"
return f.format(self.interface, self.destination_quad(), self.gateway_quad(), self.mask_quad(),
self.flags, self.metric)
def __repr__(self):
return 'RouteEntry("{0}", "{1}", "{2}", "{3}", "{4:#04x}", "{5}")' \
.format(self.interface, self.destination, self.gateway, self.mask, self.flags, self.metric)
class NetworkInterfaceCard:
def __init__(self, name, link_info):
self.name = name
self.ipv4 = set()
self.ipv6 = set()
self.link = link_info
def add_ipv4(self, info):
self.ipv4.add(info)
def add_ipv6(self, info):
self.ipv6.add(info)
def __eq__(self, other):
return self.link == other.link and \
self.ipv4 == other.ipv4 and \
self.ipv6 == other.ipv6
@staticmethod
def _json_array(items):
return "[{0}]".format(",".join(['"{0}"'.format(x) for x in sorted(items)]))
def __str__(self):
entries = ['"name": "{0}"'.format(self.name),
'"link": "{0}"'.format(self.link)]
if len(self.ipv4) > 0:
entries.append('"ipv4": {0}'.format(self._json_array(self.ipv4)))
if len(self.ipv6) > 0:
entries.append('"ipv6": {0}'.format(self._json_array(self.ipv6)))
return "{{ {0} }}".format(", ".join(entries))
class FirewallCmdDirectCommands(object):
# firewall-cmd --direct --permanent --passthrough ipv4 -t security -A OUTPUT -d 1.2.3.5 -p tcp -m owner --uid-owner 999 -j ACCEPT
# success
# adds the firewalld rule and returns the status
PassThrough = "--passthrough"
# firewall-cmd --direct --query-passthrough ipv4 -t security -A OUTPUT -d 1.2.3.5 -p tcp -m owner --uid-owner 9999 -j ACCEPT
# yes
# firewall-cmd --direct --permanent --query-passthrough ipv4 -t security -A OUTPUT -d 1.2.3.5 -p tcp -m owner --uid-owner 999 -j ACCEPT
# no
# checks if the firewalld rule is present or not
QueryPassThrough = "--query-passthrough"
# firewall-cmd --permanent --direct --remove-passthrough ipv4 -t security -A OUTPUT -d 168.63.129.16 -p tcp -m owner --uid-owner 0 -j ACCEPT
# success
# remove the firewalld rule
RemovePassThrough = "--remove-passthrough"
class AddFirewallRules(object):
"""
This class is a utility class which is only meant to orchestrate adding Firewall rules (both iptables and firewalld).
This would also be called from a separate utility binary which would be very early up in the boot order of the VM,
due to which it would not have access to basic mounts like file-system.
Please make sure to not log anything in any function this class.
"""
# -A adds the rule to the end of the iptable chain
APPEND_COMMAND = "-A"
# -I inserts the rule at the index specified. If no number specified the rules get added to the top of the chain
# iptables -t security -I OUTPUT 1 -d 168.63.129.16 -p tcp --destination-port 53 -j ACCEPT -w and
# iptables -t security -I OUTPUT -d 168.63.129.16 -p tcp --destination-port 53 -j ACCEPT -w both adds the rule as the first rule of the chain
INSERT_COMMAND = "-I"
# -D deletes the specific rule in the iptable chain
DELETE_COMMAND = "-D"
# -C checks if a specific rule exists
CHECK_COMMAND = "-C"
@staticmethod
def __get_iptables_base_command(wait=""):
"""
If 'wait' is True, adds the wait option (-w) to the given iptables command line
"""
if wait != "":
return ["iptables", "-w"]
return ["iptables"]
@staticmethod
def __get_firewalld_base_command(command):
# For more documentation - https://firewalld.org/documentation/man-pages/firewall-cmd.html
return ["firewall-cmd", "--permanent", "--direct", command, "ipv4"]
@staticmethod
def __get_common_command_params(command, destination):
return ["-t", "security", command, "OUTPUT", "-d", destination, "-p", "tcp"]
@staticmethod
def __get_firewall_base_command(command, destination, firewalld_command="", wait=""):
# Firewalld.service fails if we set `-w` in the iptables command, so not adding it at all for firewalld commands
if firewalld_command != "":
cmd = AddFirewallRules.__get_firewalld_base_command(firewalld_command)
else:
cmd = AddFirewallRules.__get_iptables_base_command(wait)
cmd.extend(AddFirewallRules.__get_common_command_params(command, destination))
return cmd
@staticmethod
def get_accept_tcp_rule(command, destination, firewalld_command="", wait=""):
# This rule allows DNS TCP request to wireserver ip for non root users
cmd = AddFirewallRules.__get_firewall_base_command(command, destination, firewalld_command, wait)
cmd.extend(['--destination-port', '53', '-j', 'ACCEPT'])
return cmd
@staticmethod
def get_wire_root_accept_rule(command, destination, owner_uid, firewalld_command="", wait=""):
cmd = AddFirewallRules.__get_firewall_base_command(command, destination, firewalld_command, wait)
cmd.extend(["-m", "owner", "--uid-owner", str(owner_uid), "-j", "ACCEPT"])
return cmd
@staticmethod
def get_wire_non_root_drop_rule(command, destination, firewalld_command="", wait=""):
cmd = AddFirewallRules.__get_firewall_base_command(command, destination, firewalld_command, wait)
cmd.extend(["-m", "conntrack", "--ctstate", "INVALID,NEW", "-j", "DROP"])
return cmd
@staticmethod
def __raise_if_empty(val, name):
if val == "":
raise Exception("{0} should not be empty".format(name))
@staticmethod
def __execute_cmd(cmd):
try:
shellutil.run_command(cmd)
except CommandError as error:
msg = "Command {0} failed with exit-code: {1}\nStdout: {2}\nStderr: {3}".format(' '.join(cmd),
error.returncode,
ustr(error.stdout),
ustr(error.stderr))
raise Exception(msg)
@staticmethod
def __execute_check_command(cmd):
# Here we primarily check if an iptable rule exist. True if it exits , false if not
try:
shellutil.run_command(cmd)
return True
except CommandError as err:
# return code 1 is expected while using the check command. Raise if encounter any other return code
if err.returncode != 1:
raise
return False
@staticmethod
def get_missing_iptables_rules(wait, dst_ip, uid):
missing = []
check_cmd_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_tcp_rule):
missing.append("ACCEPT DNS")
check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_accept_rule):
missing.append("ACCEPT")
check_cmd_drop_rule = AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_drop_rule):
missing.append("DROP")
return missing
@staticmethod
def __execute_firewall_commands(dst_ip, uid, command=APPEND_COMMAND, firewalld_command="", wait=""):
# The order in which the below rules are added matters for the ip table rules to work as expected
AddFirewallRules.__raise_if_empty(dst_ip, "Destination IP")
AddFirewallRules.__raise_if_empty(uid, "User ID")
accept_tcp_rule = AddFirewallRules.get_accept_tcp_rule(command, dst_ip,
firewalld_command=firewalld_command, wait=wait)
AddFirewallRules.__execute_cmd(accept_tcp_rule)
accept_cmd = AddFirewallRules.get_wire_root_accept_rule(command, dst_ip, uid,
firewalld_command=firewalld_command, wait=wait)
AddFirewallRules.__execute_cmd(accept_cmd)
drop_cmd = AddFirewallRules.get_wire_non_root_drop_rule(command, dst_ip, firewalld_command=firewalld_command, wait=wait)
AddFirewallRules.__execute_cmd(drop_cmd)
@staticmethod
def add_iptables_rules(wait, dst_ip, uid):
AddFirewallRules.__execute_firewall_commands(dst_ip, uid, command=AddFirewallRules.APPEND_COMMAND, wait=wait)
@staticmethod
def add_firewalld_rules(dst_ip, uid):
# Firewalld.service fails if we set `-w` in the iptables command, so not adding it at all for firewalld commands
# Firewalld.service with the "--permanent --passthrough" parameter ensures that a firewall rule is set only once even if command is executed multiple times
AddFirewallRules.__execute_firewall_commands(dst_ip, uid, firewalld_command=FirewallCmdDirectCommands.PassThrough)
@staticmethod
def check_firewalld_rule_applied(dst_ip, uid):
AddFirewallRules.__execute_firewall_commands(dst_ip, uid, firewalld_command=FirewallCmdDirectCommands.QueryPassThrough)
@staticmethod
def remove_firewalld_rules(dst_ip, uid):
AddFirewallRules.__execute_firewall_commands(dst_ip, uid, firewalld_command=FirewallCmdDirectCommands.RemovePassThrough)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/restutil.py 0000664 0000000 0000000 00000053715 14626177470 0025645 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import re
import threading
import time
import socket
import struct
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.textutil as textutil
from azurelinuxagent.common.exception import HttpError, ResourceGoneError, InvalidContainerError
from azurelinuxagent.common.future import httpclient, urlparse, ustr
from azurelinuxagent.common.version import PY_VERSION_MAJOR, AGENT_NAME, GOAL_STATE_AGENT_VERSION
SECURE_WARNING_EMITTED = False
DEFAULT_RETRIES = 6
DELAY_IN_SECONDS = 1
THROTTLE_RETRIES = 25
THROTTLE_DELAY_IN_SECONDS = 1
REDACTED_TEXT = ""
SAS_TOKEN_RETRIEVAL_REGEX = re.compile(r'^(https?://[a-zA-Z0-9.].*sig=)([a-zA-Z0-9%-]*)(.*)$')
RETRY_CODES = [
httpclient.RESET_CONTENT,
httpclient.PARTIAL_CONTENT,
httpclient.FORBIDDEN,
httpclient.INTERNAL_SERVER_ERROR,
httpclient.NOT_IMPLEMENTED,
httpclient.BAD_GATEWAY,
httpclient.SERVICE_UNAVAILABLE,
httpclient.GATEWAY_TIMEOUT,
httpclient.INSUFFICIENT_STORAGE,
429, # Request Rate Limit Exceeded
]
#
# Currently the HostGAPlugin has an issue its cache that may produce a BAD_REQUEST failure for valid URIs when using the extensionArtifact API.
# Add this status to the retryable codes, but use it only when requesting downloads via the HostGAPlugin. The retry logic in the download code
# would give enough time to the HGAP to refresh its cache. Once the fix to address that issue is deployed, consider removing the use of
# HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES.
#
HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES = RETRY_CODES[:] # make a copy of RETRY_CODES
HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES.append(httpclient.BAD_REQUEST)
RESOURCE_GONE_CODES = [
httpclient.GONE
]
OK_CODES = [
httpclient.OK,
httpclient.CREATED,
httpclient.ACCEPTED
]
NOT_MODIFIED_CODES = [
httpclient.NOT_MODIFIED
]
HOSTPLUGIN_UPSTREAM_FAILURE_CODES = [
502
]
THROTTLE_CODES = [
httpclient.FORBIDDEN,
httpclient.SERVICE_UNAVAILABLE,
429, # Request Rate Limit Exceeded
]
RETRY_EXCEPTIONS = [
httpclient.NotConnected,
httpclient.IncompleteRead,
httpclient.ImproperConnectionState,
httpclient.BadStatusLine
]
# http://www.gnu.org/software/wget/manual/html_node/Proxies.html
HTTP_PROXY_ENV = "http_proxy"
HTTPS_PROXY_ENV = "https_proxy"
NO_PROXY_ENV = "no_proxy"
HTTP_USER_AGENT = "{0}/{1}".format(AGENT_NAME, GOAL_STATE_AGENT_VERSION)
HTTP_USER_AGENT_HEALTH = "{0}+health".format(HTTP_USER_AGENT)
INVALID_CONTAINER_CONFIGURATION = "InvalidContainerConfiguration"
REQUEST_ROLE_CONFIG_FILE_NOT_FOUND = "RequestRoleConfigFileNotFound"
KNOWN_WIRESERVER_IP = '168.63.129.16'
HOST_PLUGIN_PORT = 32526
class IOErrorCounter(object):
_lock = threading.RLock()
_protocol_endpoint = KNOWN_WIRESERVER_IP
_counts = {"hostplugin":0, "protocol":0, "other":0}
@staticmethod
def increment(host=None, port=None):
with IOErrorCounter._lock:
if host == IOErrorCounter._protocol_endpoint:
if port == HOST_PLUGIN_PORT:
IOErrorCounter._counts["hostplugin"] += 1
else:
IOErrorCounter._counts["protocol"] += 1
else:
IOErrorCounter._counts["other"] += 1
@staticmethod
def get_and_reset():
with IOErrorCounter._lock:
counts = IOErrorCounter._counts.copy()
IOErrorCounter.reset()
return counts
@staticmethod
def reset():
with IOErrorCounter._lock:
IOErrorCounter._counts = {"hostplugin":0, "protocol":0, "other":0}
@staticmethod
def set_protocol_endpoint(endpoint=KNOWN_WIRESERVER_IP):
IOErrorCounter._protocol_endpoint = endpoint
def _compute_delay(retry_attempt=1, delay=DELAY_IN_SECONDS):
fib = (1, 1)
for _ in range(retry_attempt):
fib = (fib[1], fib[0]+fib[1])
return delay*fib[1]
def _is_retry_status(status, retry_codes=None):
if retry_codes is None:
retry_codes = RETRY_CODES
return status in retry_codes
def _is_retry_exception(e):
return len([x for x in RETRY_EXCEPTIONS if isinstance(e, x)]) > 0
def _is_throttle_status(status):
return status in THROTTLE_CODES
def _parse_url(url):
"""
Parse URL to get the components of the URL broken down to host, port
:rtype: string, int, bool, string
"""
o = urlparse(url)
rel_uri = o.path
if o.fragment:
rel_uri = "{0}#{1}".format(rel_uri, o.fragment)
if o.query:
rel_uri = "{0}?{1}".format(rel_uri, o.query)
secure = False
if o.scheme.lower() == "https":
secure = True
return o.hostname, o.port, secure, rel_uri
def _trim_url_parameters(url):
"""
Parse URL and return scheme://hostname:port/path
"""
o = urlparse(url)
if o.hostname:
if o.port:
return "{0}://{1}:{2}{3}".format(o.scheme, o.hostname, o.port, o.path)
else:
return "{0}://{1}{2}".format(o.scheme, o.hostname, o.path)
return url
def is_valid_cidr(string_network):
"""
Very simple check of the cidr format in no_proxy variable.
:rtype: bool
"""
if string_network.count('/') == 1:
try:
mask = int(string_network.split('/')[1])
except ValueError:
return False
if mask < 1 or mask > 32:
return False
try:
socket.inet_aton(string_network.split('/')[0])
except socket.error:
return False
else:
return False
return True
def dotted_netmask(mask):
"""Converts mask from /xx format to xxx.xxx.xxx.xxx
Example: if mask is 24 function returns 255.255.255.0
:rtype: str
"""
bits = 0xffffffff ^ (1 << 32 - mask) - 1
return socket.inet_ntoa(struct.pack('>I', bits))
def address_in_network(ip, net):
"""This function allows you to check if an IP belongs to a network subnet
Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
:rtype: bool
"""
ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
netaddr, bits = net.split('/')
netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
return (ipaddr & netmask) == (network & netmask)
def is_ipv4_address(string_ip):
"""
:rtype: bool
"""
try:
socket.inet_aton(string_ip)
except socket.error:
return False
return True
def get_no_proxy():
no_proxy = os.environ.get(NO_PROXY_ENV) or os.environ.get(NO_PROXY_ENV.upper())
if no_proxy:
no_proxy = [host for host in no_proxy.replace(' ', '').split(',') if host]
# no_proxy in the proxies argument takes precedence
return no_proxy
def bypass_proxy(host):
no_proxy = get_no_proxy()
if no_proxy:
if is_ipv4_address(host):
for proxy_ip in no_proxy:
if is_valid_cidr(proxy_ip):
if address_in_network(host, proxy_ip):
return True
elif host == proxy_ip:
# If no_proxy ip was defined in plain IP notation instead of cidr notation &
# matches the IP of the index
return True
else:
for proxy_domain in no_proxy:
if host.lower().endswith(proxy_domain.lower()):
# The URL does match something in no_proxy, so we don't want
# to apply the proxies on this URL.
return True
return False
def _get_http_proxy(secure=False):
# Prefer the configuration settings over environment variables
host = conf.get_httpproxy_host()
port = None
if not host is None:
port = conf.get_httpproxy_port()
else:
http_proxy_env = HTTPS_PROXY_ENV if secure else HTTP_PROXY_ENV
http_proxy_url = None
for v in [http_proxy_env, http_proxy_env.upper()]:
if v in os.environ:
http_proxy_url = os.environ[v]
break
if not http_proxy_url is None:
host, port, _, _ = _parse_url(http_proxy_url)
return host, port
def redact_sas_tokens_in_urls(url):
return SAS_TOKEN_RETRIEVAL_REGEX.sub(r"\1" + REDACTED_TEXT + r"\3", url)
def _http_request(method, host, rel_uri, timeout, port=None, data=None, secure=False,
headers=None, proxy_host=None, proxy_port=None, redact_data=False):
headers = {} if headers is None else headers
headers['Connection'] = 'close'
use_proxy = proxy_host is not None and proxy_port is not None
if port is None:
port = 443 if secure else 80
if 'User-Agent' not in headers:
headers['User-Agent'] = HTTP_USER_AGENT
if use_proxy:
conn_host, conn_port = proxy_host, proxy_port
scheme = "https" if secure else "http"
url = "{0}://{1}:{2}{3}".format(scheme, host, port, rel_uri)
else:
conn_host, conn_port = host, port
url = rel_uri
if secure:
conn = httpclient.HTTPSConnection(conn_host,
conn_port,
timeout=timeout)
if use_proxy:
conn.set_tunnel(host, port)
else:
conn = httpclient.HTTPConnection(conn_host,
conn_port,
timeout=timeout)
payload = data
if redact_data:
payload = "[REDACTED]"
# Logger requires the msg to be a ustr to log properly, ensuring that the data string that we log is always ustr
logger.verbose("HTTP connection [{0}] [{1}] [{2}] [{3}]",
method,
redact_sas_tokens_in_urls(url),
textutil.str_to_encoded_ustr(payload),
headers)
conn.request(method=method, url=url, body=data, headers=headers)
return conn.getresponse()
def http_request(method,
url, data, timeout,
headers=None,
use_proxy=False,
max_retry=None,
retry_codes=None,
retry_delay=DELAY_IN_SECONDS,
redact_data=False,
return_raw_response=False):
"""
NOTE: This method provides some logic to handle errors in the HTTP request, including checking the HTTP status of the response
and handling some exceptions. If return_raw_response is set to True all the error handling will be skipped and the
method will return the actual HTTP response and bubble up any exceptions while issuing the request. Also note that if
return_raw_response is True no retries will be done.
"""
if max_retry is None:
max_retry = DEFAULT_RETRIES
if retry_codes is None:
retry_codes = RETRY_CODES
global SECURE_WARNING_EMITTED # pylint: disable=W0603
host, port, secure, rel_uri = _parse_url(url)
# Use the HTTP(S) proxy
proxy_host, proxy_port = (None, None)
if use_proxy and not bypass_proxy(host):
proxy_host, proxy_port = _get_http_proxy(secure=secure)
if proxy_host or proxy_port:
logger.verbose("HTTP proxy: [{0}:{1}]", proxy_host, proxy_port)
# If httplib module is not built with ssl support,
# fallback to HTTP if allowed
if secure and not hasattr(httpclient, "HTTPSConnection"):
if not conf.get_allow_http():
raise HttpError("HTTPS is unavailable and required")
secure = False
if not SECURE_WARNING_EMITTED:
logger.warn("Python does not include SSL support")
SECURE_WARNING_EMITTED = True
# If httplib module doesn't support HTTPS tunnelling,
# fallback to HTTP if allowed
if secure and \
proxy_host is not None and \
proxy_port is not None \
and not hasattr(httpclient.HTTPSConnection, "set_tunnel"):
if not conf.get_allow_http():
raise HttpError("HTTPS tunnelling is unavailable and required")
secure = False
if not SECURE_WARNING_EMITTED:
logger.warn("Python does not support HTTPS tunnelling")
SECURE_WARNING_EMITTED = True
msg = ''
attempt = 0
delay = 0
was_throttled = False
while attempt < max_retry:
if attempt > 0:
# Compute the request delay
# -- Use a fixed delay if the server ever rate-throttles the request
# (with a safe, minimum number of retry attempts)
# -- Otherwise, compute a delay that is the product of the next
# item in the Fibonacci series and the initial delay value
delay = THROTTLE_DELAY_IN_SECONDS \
if was_throttled \
else _compute_delay(retry_attempt=attempt,
delay=retry_delay)
logger.verbose("[HTTP Retry] "
"Attempt {0} of {1} will delay {2} seconds: {3}",
attempt+1,
max_retry,
delay,
msg)
time.sleep(delay)
attempt += 1
try:
resp = _http_request(method,
host,
rel_uri,
timeout,
port=port,
data=data,
secure=secure,
headers=headers,
proxy_host=proxy_host,
proxy_port=proxy_port,
redact_data=redact_data)
logger.verbose("[HTTP Response] Status Code {0}", resp.status)
if return_raw_response: # skip all error handling
return resp
if request_failed(resp):
if _is_retry_status(resp.status, retry_codes=retry_codes):
msg = '[HTTP Retry] {0} {1} -- Status Code {2}'.format(method, url, resp.status)
# Note if throttled and ensure a safe, minimum number of
# retry attempts
if _is_throttle_status(resp.status):
was_throttled = True
max_retry = max(max_retry, THROTTLE_RETRIES)
continue
# If we got a 410 (resource gone) for any reason, raise an exception. The caller will handle it by
# forcing a goal state refresh and retrying the call.
if resp.status in RESOURCE_GONE_CODES:
response_error = read_response_error(resp)
raise ResourceGoneError(response_error)
# If we got a 400 (bad request) because the container id is invalid, it could indicate a stale goal
# state. The caller will handle this exception by forcing a goal state refresh and retrying the call.
if resp.status == httpclient.BAD_REQUEST:
response_error = read_response_error(resp)
if INVALID_CONTAINER_CONFIGURATION in response_error:
raise InvalidContainerError(response_error)
return resp
except httpclient.HTTPException as e:
if return_raw_response: # skip all error handling
raise
clean_url = _trim_url_parameters(url)
msg = '[HTTP Failed] {0} {1} -- HttpException {2}'.format(method, clean_url, e)
if _is_retry_exception(e):
continue
break
except IOError as e:
if return_raw_response: # skip all error handling
raise
IOErrorCounter.increment(host=host, port=port)
clean_url = _trim_url_parameters(url)
msg = '[HTTP Failed] {0} {1} -- IOError {2}'.format(method, clean_url, e)
continue
raise HttpError("{0} -- {1} attempts made".format(msg, attempt))
def http_get(url,
headers=None,
use_proxy=False,
max_retry=None,
retry_codes=None,
retry_delay=DELAY_IN_SECONDS,
return_raw_response=False,
timeout=10):
"""
NOTE: This method provides some logic to handle errors in the HTTP request, including checking the HTTP status of the response
and handling some exceptions. If return_raw_response is set to True all the error handling will be skipped and the
method will return the actual HTTP response and bubble up any exceptions while issuing the request. Also note that if
return_raw_response is True no retries will be done.
"""
if max_retry is None:
max_retry = DEFAULT_RETRIES
if retry_codes is None:
retry_codes = RETRY_CODES
return http_request("GET",
url, None, timeout,
headers=headers,
use_proxy=use_proxy,
max_retry=max_retry,
retry_codes=retry_codes,
retry_delay=retry_delay,
return_raw_response=return_raw_response)
def http_head(url,
headers=None,
use_proxy=False,
max_retry=None,
retry_codes=None,
retry_delay=DELAY_IN_SECONDS,
timeout=10):
if max_retry is None:
max_retry = DEFAULT_RETRIES
if retry_codes is None:
retry_codes = RETRY_CODES
return http_request("HEAD",
url, None, timeout,
headers=headers,
use_proxy=use_proxy,
max_retry=max_retry,
retry_codes=retry_codes,
retry_delay=retry_delay)
def http_post(url,
data,
headers=None,
use_proxy=False,
max_retry=None,
retry_codes=None,
retry_delay=DELAY_IN_SECONDS,
timeout=10):
if max_retry is None:
max_retry = DEFAULT_RETRIES
if retry_codes is None:
retry_codes = RETRY_CODES
return http_request("POST",
url, data, timeout,
headers=headers,
use_proxy=use_proxy,
max_retry=max_retry,
retry_codes=retry_codes,
retry_delay=retry_delay)
def http_put(url,
data,
headers=None,
use_proxy=False,
max_retry=None,
retry_codes=None,
retry_delay=DELAY_IN_SECONDS,
redact_data=False,
timeout=10):
if max_retry is None:
max_retry = DEFAULT_RETRIES
if retry_codes is None:
retry_codes = RETRY_CODES
return http_request("PUT",
url, data, timeout,
headers=headers,
use_proxy=use_proxy,
max_retry=max_retry,
retry_codes=retry_codes,
retry_delay=retry_delay,
redact_data=redact_data)
def http_delete(url,
headers=None,
use_proxy=False,
max_retry=None,
retry_codes=None,
retry_delay=DELAY_IN_SECONDS,
timeout=10):
if max_retry is None:
max_retry = DEFAULT_RETRIES
if retry_codes is None:
retry_codes = RETRY_CODES
return http_request("DELETE",
url, None, timeout,
headers=headers,
use_proxy=use_proxy,
max_retry=max_retry,
retry_codes=retry_codes,
retry_delay=retry_delay)
def request_failed(resp, ok_codes=None):
if ok_codes is None:
ok_codes = OK_CODES
return not request_succeeded(resp, ok_codes=ok_codes)
def request_succeeded(resp, ok_codes=None):
if ok_codes is None:
ok_codes = OK_CODES
return resp is not None and resp.status in ok_codes
def request_not_modified(resp):
return resp is not None and resp.status in NOT_MODIFIED_CODES
def request_failed_at_hostplugin(resp, upstream_failure_codes=None):
"""
Host plugin will return 502 for any upstream issue, so a failure is any 5xx except 502
"""
if upstream_failure_codes is None:
upstream_failure_codes = HOSTPLUGIN_UPSTREAM_FAILURE_CODES
return resp is not None and resp.status >= 500 and resp.status not in upstream_failure_codes
def read_response_error(resp):
result = ''
if resp is not None:
try:
result = "[HTTP Failed] [{0}: {1}] {2}".format(
resp.status,
resp.reason,
resp.read())
# this result string is passed upstream to several methods
# which do a raise HttpError() or a format() of some kind;
# as a result it cannot have any unicode characters
if PY_VERSION_MAJOR < 3:
result = ustr(result, encoding='ascii', errors='ignore')
else:
result = result\
.encode(encoding='ascii', errors='ignore')\
.decode(encoding='ascii', errors='ignore')
result = textutil.replace_non_ascii(result)
except Exception as e:
logger.warn(textutil.format_exception(e))
return result
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/shellutil.py 0000664 0000000 0000000 00000040112 14626177470 0025762 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import subprocess
import sys
import tempfile
import threading
if sys.version_info[0] == 2:
# TimeoutExpired was introduced on Python 3; define a dummy class for Python 2
class TimeoutExpired(Exception):
pass
else:
from subprocess import TimeoutExpired
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.future import ustr
if not hasattr(subprocess, 'check_output'):
def check_output(*popenargs, **kwargs):
r"""Backport from subprocess module from python 2.7"""
if 'stdout' in kwargs:
raise ValueError('stdout argument not allowed, '
'it will be overridden.')
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
output, unused_err = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
raise subprocess.CalledProcessError(retcode, cmd, output=output)
return output
# Exception classes used by this module.
class CalledProcessError(Exception):
def __init__(self, returncode, cmd, output=None): # pylint: disable=W0231
self.returncode = returncode
self.cmd = cmd
self.output = output
def __str__(self):
return ("Command '{0}' returned non-zero exit status {1}"
"").format(self.cmd, self.returncode)
subprocess.check_output = check_output
subprocess.CalledProcessError = CalledProcessError
# pylint: disable=W0105
"""
Shell command util functions
"""
# pylint: enable=W0105
def has_command(cmd):
"""
Return True if the given command is on the path
"""
return not run(cmd, False)
def run(cmd, chk_err=True, expected_errors=None):
"""
Note: Deprecating in favour of `azurelinuxagent.common.utils.shellutil.run_command` function.
Calls run_get_output on 'cmd', returning only the return code.
If chk_err=True then errors will be reported in the log.
If chk_err=False then errors will be suppressed from the log.
"""
if expected_errors is None:
expected_errors = []
retcode, out = run_get_output(cmd, chk_err=chk_err, expected_errors=expected_errors) # pylint: disable=W0612
return retcode
def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=None):
"""
Wrapper for subprocess.check_output.
Execute 'cmd'. Returns return code and STDOUT, trapping expected
exceptions.
Reports exceptions to Error if chk_err parameter is True
For new callers, consider using run_command instead as it separates stdout from stderr,
returns only stdout on success, logs both outputs and return code on error and raises an exception.
"""
if expected_errors is None:
expected_errors = []
if log_cmd:
logger.verbose(u"Command: [{0}]", cmd)
try:
process = _popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
output, _ = process.communicate()
_on_command_completed(process.pid)
output = __encode_command_output(output)
if process.returncode != 0:
if chk_err:
msg = u"Command: [{0}], " \
u"return code: [{1}], " \
u"result: [{2}]".format(cmd, process.returncode, output)
if process.returncode in expected_errors:
logger.info(msg)
else:
logger.error(msg)
return process.returncode, output
except Exception as exception:
if chk_err:
logger.error(u"Command [{0}] raised unexpected exception: [{1}]"
.format(cmd, ustr(exception)))
return -1, ustr(exception)
return 0, output
def __format_command(command):
"""
Formats the command taken by run_command/run_pipe.
Examples:
> __format_command("sort")
'sort'
> __format_command(["sort", "-u"])
'sort -u'
> __format_command([["sort"], ["unique", "-n"]])
'sort | unique -n'
"""
if isinstance(command, list):
if command and isinstance(command[0], list):
return " | ".join([" ".join(cmd) for cmd in command])
return " ".join(command)
return command
def __encode_command_output(output):
"""
Encodes the stdout/stderr returned by subprocess.communicate()
"""
return ustr(output if output is not None else b'', encoding='utf-8', errors="backslashreplace")
class CommandError(Exception):
"""
Exception raised by run_command/run_pipe when the command returns an error
"""
@staticmethod
def _get_message(command, return_code, stderr):
command_name = command[0] if isinstance(command, list) and len(command) > 0 else command
return "'{0}' failed: {1} ({2})".format(command_name, return_code, stderr.rstrip())
def __init__(self, command, return_code, stdout, stderr):
super(Exception, self).__init__(CommandError._get_message(command, return_code, stderr)) # pylint: disable=E1003
self.command = command
self.returncode = return_code
self.stdout = stdout
self.stderr = stderr
def __run_command(command_action, command, log_error, encode_output):
"""
Executes the given command_action and returns its stdout. The command_action is a function that executes a command/pipe
and returns its exit code, stdout, and stderr.
If there are any errors executing the command it raises a RunCommandException; if 'log_error'
is True, it also logs details about the error.
If encode_output is True the stdout is returned as a string, otherwise it is returned as a bytes object.
"""
try:
return_code, stdout, stderr = command_action()
if encode_output:
stdout = __encode_command_output(stdout)
stderr = __encode_command_output(stderr)
if return_code != 0:
if log_error:
logger.error(
"Command: [{0}], return code: [{1}], stdout: [{2}] stderr: [{3}]",
__format_command(command),
return_code,
stdout,
stderr)
raise CommandError(command=__format_command(command), return_code=return_code, stdout=stdout, stderr=stderr)
return stdout
except CommandError:
raise
except Exception as exception:
if log_error:
logger.error(u"Command [{0}] raised unexpected exception: [{1}]", __format_command(command), ustr(exception))
raise
# W0622: Redefining built-in 'input' -- disabled: the parameter name mimics subprocess.communicate()
def run_command(command, input=None, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, log_error=False, encode_input=True, encode_output=True, track_process=True, timeout=None): # pylint:disable=W0622
"""
Executes the given command and returns its stdout.
If there are any errors executing the command it raises a RunCommandException; if 'log_error'
is True, it also logs details about the error.
If encode_output is True the stdout is returned as a string, otherwise it is returned as a bytes object.
If track_process is False the command is not added to list of running commands
This function is a thin wrapper around Popen/communicate in the subprocess module:
* The 'input' parameter corresponds to the same parameter in communicate
* The 'stdin' parameter corresponds to the same parameters in Popen
* Only one of 'input' and 'stdin' can be specified
* The 'stdout' and 'stderr' parameters correspond to the same parameters in Popen, except that they
default to subprocess.PIPE instead of None
* If the output of the command is redirected using the 'stdout' or 'stderr' parameters (i.e. if the
value for these parameters is anything other than the default (subprocess.PIPE)), then the corresponding
values returned by this function or the CommandError exception will be empty strings.
NOTE: The 'timeout' parameter is ignored on Python 2
NOTE: This is the preferred method to execute shell commands over `azurelinuxagent.common.utils.shellutil.run` function.
"""
if input is not None and stdin is not None:
raise ValueError("The input and stdin arguments are mutually exclusive")
def command_action():
popen_stdin = communicate_input = None
if input is not None:
popen_stdin = subprocess.PIPE
communicate_input = input.encode() if encode_input and isinstance(input, str) else input # communicate() needs an array of bytes
if stdin is not None:
popen_stdin = stdin
communicate_input = None
if track_process:
process = _popen(command, stdin=popen_stdin, stdout=stdout, stderr=stderr, shell=False)
else:
process = subprocess.Popen(command, stdin=popen_stdin, stdout=stdout, stderr=stderr, shell=False)
try:
if sys.version_info[0] == 2: # communicate() doesn't support timeout on Python 2
command_stdout, command_stderr = process.communicate(input=communicate_input)
else:
command_stdout, command_stderr = process.communicate(input=communicate_input, timeout=timeout)
except TimeoutExpired:
if log_error:
logger.error(u"Command [{0}] timed out", __format_command(command))
command_stdout, command_stderr = '', ''
try:
process.kill()
# try to get any output from the command, but ignore any errors if we can't
try:
command_stdout, command_stderr = process.communicate()
# W0702: No exception type(s) specified (bare-except)
except: # pylint: disable=W0702
pass
except Exception as exception:
if log_error:
logger.error(u"Can't terminate timed out process: {0}", ustr(exception))
raise CommandError(command=__format_command(command), return_code=-1, stdout=command_stdout, stderr="command timeout\n{0}".format(command_stderr))
if track_process:
_on_command_completed(process.pid)
return process.returncode, command_stdout, command_stderr
return __run_command(command_action=command_action, command=command, log_error=log_error, encode_output=encode_output)
def run_pipe(pipe, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, log_error=False, encode_output=True):
"""
Executes the given commands as a pipe and returns its stdout as a string.
The pipe is a list of commands, which in turn are a list of strings, e.g.
[["sort"], ["uniq", "-n"]] represents 'sort | unique -n'
If there are any errors executing the command it raises a RunCommandException; if 'log_error'
is True, it also logs details about the error.
If encode_output is True the stdout is returned as a string, otherwise it is returned as a bytes object.
This function is a thin wrapper around Popen/communicate in the subprocess module:
* The 'stdin' parameter is used as input for the first command in the pipe
* The 'stdout', and 'stderr' can be used to redirect the output of the pipe
* If the output of the pipe is redirected using the 'stdout' or 'stderr' parameters (i.e. if the
value for these parameters is anything other than the default (subprocess.PIPE)), then the corresponding
values returned by this function or the CommandError exception will be empty strings.
"""
if len(pipe) < 2:
raise ValueError("The pipe must consist of at least 2 commands")
def command_action():
stderr_file = None
try:
popen_stdin = stdin
# If stderr is subprocess.PIPE each call to Popen would create a new pipe. We want to collect the stderr of all the
# commands in the pipe so we replace stderr with a temporary file that we read once the pipe completes.
if stderr == subprocess.PIPE:
stderr_file = tempfile.TemporaryFile()
popen_stderr = stderr_file
else:
popen_stderr = stderr
processes = []
i = 0
while i < len(pipe) - 1:
processes.append(_popen(pipe[i], stdin=popen_stdin, stdout=subprocess.PIPE, stderr=popen_stderr))
popen_stdin = processes[i].stdout
i += 1
processes.append(_popen(pipe[i], stdin=popen_stdin, stdout=stdout, stderr=popen_stderr))
i = 0
while i < len(processes) - 1:
processes[i].stdout.close() # see https://docs.python.org/2/library/subprocess.html#replacing-shell-pipeline
i += 1
pipe_stdout, pipe_stderr = processes[i].communicate()
for proc in processes:
_on_command_completed(proc.pid)
if stderr_file is not None:
stderr_file.seek(0)
pipe_stderr = stderr_file.read()
return processes[i].returncode, pipe_stdout, pipe_stderr
finally:
if stderr_file is not None:
stderr_file.close()
return __run_command(command_action=command_action, command=pipe, log_error=log_error, encode_output=encode_output)
def quote(word_list):
"""
Quote a list or tuple of strings for Unix Shell as words, using the
byte-literal single quote.
The resulting string is safe for use with ``shell=True`` in ``subprocess``,
and in ``os.system``. ``assert shlex.split(ShellQuote(wordList)) == wordList``.
See POSIX.1:2013 Vol 3, Chap 2, Sec 2.2.2:
http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_02
"""
if not isinstance(word_list, (tuple, list)):
word_list = (word_list,)
return " ".join(list("'{0}'".format(s.replace("'", "'\\''")) for s in word_list))
#
# The run_command/run_pipe/run/run_get_output functions maintain a list of the commands that they are currently executing.
#
#
_running_commands = []
_running_commands_lock = threading.RLock()
PARENT_PROCESS_NAME = "AZURE_GUEST_AGENT_PARENT_PROCESS_NAME"
AZURE_GUEST_AGENT = "AZURE_GUEST_AGENT"
def _popen(*args, **kwargs):
with _running_commands_lock:
# Add the environment variables
env = {}
if 'env' in kwargs:
env.update(kwargs['env'])
else:
env.update(os.environ)
# Set the marker before process start
env[PARENT_PROCESS_NAME] = AZURE_GUEST_AGENT
kwargs['env'] = env
process = subprocess.Popen(*args, **kwargs)
_running_commands.append(process.pid)
return process
def _on_command_completed(pid):
with _running_commands_lock:
_running_commands.remove(pid)
def get_running_commands():
"""
Returns the commands started by run/run_get_output/run_command/run_pipe that are currently running.
NOTE: This function is not synchronized with process completion, so the returned array may include processes that have
already completed. Also, keep in mind that by the time this function returns additional processes may have
started or completed.
"""
with _running_commands_lock:
return _running_commands[:] # return a copy, since the call may originate on another thread
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/textutil.py 0000664 0000000 0000000 00000030350 14626177470 0025642 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import base64
import crypt
import hashlib
import random
import re
import string
import struct
import sys
import traceback
import xml.dom.minidom as minidom
import zlib
from azurelinuxagent.common.future import ustr
def parse_doc(xml_text):
"""
Parse xml document from string
"""
# The minidom lib has some issue with unicode in python2.
# Encode the string into utf-8 first
xml_text = xml_text.encode('utf-8')
return minidom.parseString(xml_text)
def findall(root, tag, namespace=None):
"""
Get all nodes by tag and namespace under Node root.
"""
if root is None:
return []
if namespace is None:
return root.getElementsByTagName(tag)
else:
return root.getElementsByTagNameNS(namespace, tag)
def find(root, tag, namespace=None):
"""
Get first node by tag and namespace under Node root.
"""
nodes = findall(root, tag, namespace=namespace)
if nodes is not None and len(nodes) >= 1:
return nodes[0]
else:
return None
def gettext(node):
"""
Get node text
"""
if node is None:
return None
for child in node.childNodes:
if child.nodeType == child.TEXT_NODE:
return child.data
return None
def findtext(root, tag, namespace=None):
"""
Get text of node by tag and namespace under Node root.
"""
node = find(root, tag, namespace=namespace)
return gettext(node)
def getattrib(node, attr_name):
"""
Get attribute of xml node
"""
if node is not None:
return node.getAttribute(attr_name)
else:
return None
def unpack(buf, offset, value_range):
"""
Unpack bytes into python values.
"""
result = 0
for i in value_range:
result = (result << 8) | str_to_ord(buf[offset + i])
return result
def unpack_little_endian(buf, offset, length):
"""
Unpack little endian bytes into python values.
"""
return unpack(buf, offset, list(range(length - 1, -1, -1)))
def unpack_big_endian(buf, offset, length):
"""
Unpack big endian bytes into python values.
"""
return unpack(buf, offset, list(range(0, length)))
def hex_dump3(buf, offset, length):
"""
Dump range of buf in formatted hex.
"""
return ''.join(['%02X' % str_to_ord(char) for char in buf[offset:offset + length]])
def hex_dump2(buf):
"""
Dump buf in formatted hex.
"""
return hex_dump3(buf, 0, len(buf))
def is_in_range(a, low, high):
"""
Return True if 'a' in 'low' <= a <= 'high'
"""
return low <= a <= high
def is_printable(ch):
"""
Return True if character is displayable.
"""
return (is_in_range(ch, str_to_ord('A'), str_to_ord('Z'))
or is_in_range(ch, str_to_ord('a'), str_to_ord('z'))
or is_in_range(ch, str_to_ord('0'), str_to_ord('9')))
def hex_dump(buffer, size): # pylint: disable=redefined-builtin
"""
Return Hex formated dump of a 'buffer' of 'size'.
"""
if size < 0:
size = len(buffer)
result = ""
for i in range(0, size):
if (i % 16) == 0:
result += "%06X: " % i
byte = buffer[i]
if type(byte) == str:
byte = ord(byte.decode('latin1'))
result += "%02X " % byte
if (i & 15) == 7:
result += " "
if ((i + 1) % 16) == 0 or (i + 1) == size:
j = i
while ((j + 1) % 16) != 0:
result += " "
if (j & 7) == 7:
result += " "
j += 1
result += " "
for j in range(i - (i % 16), i + 1):
byte = buffer[j]
if type(byte) == str:
byte = str_to_ord(byte.decode('latin1'))
k = '.'
if is_printable(byte):
k = chr(byte)
result += k
if (i + 1) != size:
result += "\n"
return result
def str_to_ord(a):
"""
Allows indexing into a string or an array of integers transparently.
Generic utility function.
"""
if type(a) == type(b'') or type(a) == type(u''):
a = ord(a)
return a
def compare_bytes(a, b, start, length):
for offset in range(start, start + length):
if str_to_ord(a[offset]) != str_to_ord(b[offset]):
return False
return True
def int_to_ip4_addr(a):
"""
Build DHCP request string.
"""
return "%u.%u.%u.%u" % ((a >> 24) & 0xFF,
(a >> 16) & 0xFF,
(a >> 8) & 0xFF,
(a) & 0xFF)
def hexstr_to_bytearray(a):
"""
Return hex string packed into a binary struct.
"""
b = b""
for c in range(0, len(a) // 2):
b += struct.pack("B", int(a[c * 2:c * 2 + 2], 16))
return b
def set_ssh_config(config, name, val):
found = False
no_match = -1
match_start = no_match
for i in range(0, len(config)):
if config[i].startswith(name) and match_start == no_match:
config[i] = "{0} {1}".format(name, val)
found = True
elif config[i].lower().startswith("match"):
if config[i].lower().startswith("match all"):
# outside match block
match_start = no_match
elif match_start == no_match:
# inside match block
match_start = i
if not found:
if match_start != no_match:
i = match_start
config.insert(i, "{0} {1}".format(name, val))
return config
def set_ini_config(config, name, val):
notfound = True
nameEqual = name + '='
length = len(config)
text = "{0}=\"{1}\"".format(name, val)
for i in reversed(range(0, length)):
if config[i].startswith(nameEqual):
config[i] = text
notfound = False
break
if notfound:
config.insert(length - 1, text)
def replace_non_ascii(incoming, replace_char=''):
outgoing = ''
if incoming is not None:
for c in incoming:
if str_to_ord(c) > 128:
outgoing += replace_char
else:
outgoing += c
return outgoing
def remove_bom(c):
"""
bom is comprised of a sequence of three chars,0xef, 0xbb, 0xbf, in case of utf-8.
"""
if not is_str_none_or_whitespace(c) and \
len(c) > 2 and \
str_to_ord(c[0]) > 128 and \
str_to_ord(c[1]) > 128 and \
str_to_ord(c[2]) > 128:
c = c[3:]
return c
def gen_password_hash(password, crypt_id, salt_len):
collection = string.ascii_letters + string.digits
salt = ''.join(random.choice(collection) for _ in range(salt_len))
salt = "${0}${1}".format(crypt_id, salt)
if sys.version_info[0] == 2:
# if python 2.*, encode to type 'str' to prevent Unicode Encode Error from crypt.crypt
password = password.encode('utf-8')
return crypt.crypt(password, salt)
def get_bytes_from_pem(pem_str):
base64_bytes = ""
for line in pem_str.split('\n'):
if "----" not in line:
base64_bytes += line
return base64_bytes
def compress(s):
"""
Compress a string, and return the base64 encoded result of the compression.
This method returns a string instead of a byte array. It is expected
that this method is called to compress smallish strings, not to compress
the contents of a file. The output of this method is suitable for
embedding in log statements.
"""
from azurelinuxagent.common.version import PY_VERSION_MAJOR
if PY_VERSION_MAJOR > 2:
return base64.b64encode(zlib.compress(bytes(s, 'utf-8'))).decode('utf-8')
return base64.b64encode(zlib.compress(s))
def b64encode(s):
from azurelinuxagent.common.version import PY_VERSION_MAJOR
if PY_VERSION_MAJOR > 2:
return base64.b64encode(bytes(s, 'utf-8')).decode('utf-8')
return base64.b64encode(s)
def b64decode(s):
from azurelinuxagent.common.version import PY_VERSION_MAJOR
if PY_VERSION_MAJOR > 2:
return base64.b64decode(s).decode('utf-8')
return base64.b64decode(s)
def safe_shlex_split(s):
import shlex
from azurelinuxagent.common.version import PY_VERSION
if PY_VERSION[:2] == (2, 6):
return shlex.split(s.encode('utf-8'))
return shlex.split(s)
def swap_hexstring(s, width=2):
r = len(s) % width
if r != 0:
s = ('0' * (width - (len(s) % width))) + s
return ''.join(reversed(
re.findall(
r'[a-f0-9]{{{0}}}'.format(width),
s,
re.IGNORECASE)))
def parse_json(json_str):
"""
Parse json string and return a resulting dictionary
"""
# trim null and whitespaces
result = None
if not is_str_empty(json_str):
import json
result = json.loads(json_str.rstrip(' \t\r\n\0'))
return result
def is_str_none_or_whitespace(s):
return s is None or len(s) == 0 or s.isspace()
def is_str_empty(s):
return is_str_none_or_whitespace(s) or is_str_none_or_whitespace(s.rstrip(' \t\r\n\0'))
def hash_strings(string_list):
"""
Compute a cryptographic hash of a list of strings
:param string_list: The strings to be hashed
:return: The cryptographic hash (digest) of the strings in the order provided
"""
sha1_hash = hashlib.sha1()
for item in string_list:
sha1_hash.update(item.encode())
return sha1_hash.digest()
def format_memory_value(unit, value):
units = {'bytes': 1, 'kilobytes': 1024, 'megabytes': 1024*1024, 'gigabytes': 1024*1024*1024}
if unit not in units:
raise ValueError("Unit must be one of {0}".format(units.keys()))
try:
value = float(value)
except TypeError:
raise TypeError('Value must be convertible to a float')
return int(value * units[unit])
def str_to_encoded_ustr(s, encoding='utf-8'):
"""
This function takes the string and converts it into the corresponding encoded ustr if its not already a ustr.
The encoding is utf-8 by default if not specified.
Note: ustr() is a unicode object for Py2 and a str object for Py3.
:param s: The string to convert to ustr
:param encoding: Encoding to use. Utf-8 by default
:return: Returns the corresponding ustr string. Returns None if input is None.
"""
# TODO: Import at the top of the file instead of a local import (using local import here to avoid cyclic dependency)
from azurelinuxagent.common.version import PY_VERSION_MAJOR
if s is None or type(s) is ustr:
# If its already a ustr/None then return as is
return s
if PY_VERSION_MAJOR > 2:
try:
# For py3+, str() is unicode by default
if isinstance(s, bytes):
# str.encode() returns bytes which should be decoded to get the str.
return s.decode(encoding)
else:
# If its not encoded, just return the string
return ustr(s)
except Exception:
# If some issues in decoding, just return the string
return ustr(s)
# For Py2, explicitly convert the string to unicode with the specified encoding
return ustr(s, encoding=encoding)
def format_exception(exception):
# Function to format exception message
e = None
if sys.version_info[0] == 2:
_, e, tb = sys.exc_info()
else:
tb = exception.__traceback__
msg = ustr(exception) + "\n"
if tb is None or (sys.version_info[0] == 2 and e != exception):
msg += "[Traceback not available]"
else:
msg += ''.join(traceback.format_exception(type(exception), value=exception, tb=tb))
return msg
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/utils/timeutil.py 0000664 0000000 0000000 00000002274 14626177470 0025620 0 ustar 00root root 0000000 0000000 # Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache License.
import datetime
def create_timestamp(dt=None):
"""
Returns a string with the given datetime in iso format. If no datetime is given as parameter, it
uses datetime.utcnow().
"""
if dt is None:
dt = datetime.datetime.utcnow()
return dt.isoformat()
def create_history_timestamp(dt=None):
"""
Returns a string with the given datetime formatted as a timestamp for the agent's history folder
"""
if dt is None:
dt = datetime.datetime.utcnow()
return dt.strftime('%Y-%m-%dT%H-%M-%S')
def datetime_to_ticks(dt):
"""
Converts 'dt', a datetime, to the number of ticks (1 tick == 1/10000000 sec) since datetime.min (0001-01-01 00:00:00).
Note that the resolution of a datetime goes only to microseconds.
"""
return int(10 ** 7 * total_seconds(dt - datetime.datetime.min))
def total_seconds(dt):
"""
Compute the total_seconds for timedelta 'td'. Used instead timedelta.total_seconds() because 2.6 does not implement total_seconds.
"""
return ((24.0 * 60 * 60 * dt.days + dt.seconds) * 10 ** 6 + dt.microseconds) / 10 ** 6
Azure-WALinuxAgent-2b21de5/azurelinuxagent/common/version.py 0000664 0000000 0000000 00000025271 14626177470 0024313 0 ustar 00root root 0000000 0000000 # Copyright 2019 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import re
import platform
import sys
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.future import ustr, get_linux_distribution
__DAEMON_VERSION_ENV_VARIABLE = '_AZURE_GUEST_AGENT_DAEMON_VERSION_'
"""
The daemon process sets this variable's value to the daemon's version number.
The variable is set only on versions >= 2.2.53
"""
def set_daemon_version(version):
"""
Sets the value of the _AZURE_GUEST_AGENT_DAEMON_VERSION_ environment variable.
The given 'version' can be a FlexibleVersion or a string that can be parsed into a FlexibleVersion
"""
flexible_version = version if isinstance(version, FlexibleVersion) else FlexibleVersion(version)
os.environ[__DAEMON_VERSION_ENV_VARIABLE] = ustr(flexible_version)
def get_daemon_version():
"""
Retrieves the value of the _AZURE_GUEST_AGENT_DAEMON_VERSION_ environment variable.
The value indicates the version of the daemon that started the current agent process or, if the current
process is the daemon, the version of the current process.
If the variable is not set (because the agent is < 2.2.53, or the process was not started by the daemon and
the process is not the daemon itself) the function returns "0.0.0.0"
"""
if __DAEMON_VERSION_ENV_VARIABLE in os.environ:
return FlexibleVersion(os.environ[__DAEMON_VERSION_ENV_VARIABLE])
return FlexibleVersion("0.0.0.0")
def get_f5_platform():
"""
Add this workaround for detecting F5 products because BIG-IP/IQ/etc do
not show their version info in the /etc/product-version location. Instead,
the version and product information is contained in the /VERSION file.
"""
result = [None, None, None, None]
f5_version = re.compile("^Version: (\d+\.\d+\.\d+)") # pylint: disable=W1401
f5_product = re.compile("^Product: ([\w-]+)") # pylint: disable=W1401
with open('/VERSION', 'r') as fh:
content = fh.readlines()
for line in content:
version_matches = f5_version.match(line)
product_matches = f5_product.match(line)
if version_matches:
result[1] = version_matches.group(1)
elif product_matches:
result[3] = product_matches.group(1)
if result[3] == "BIG-IP":
result[0] = "bigip"
result[2] = "bigip"
elif result[3] == "BIG-IQ":
result[0] = "bigiq"
result[2] = "bigiq"
elif result[3] == "iWorkflow":
result[0] = "iworkflow"
result[2] = "iworkflow"
return result
def get_checkpoint_platform():
take = build = release = ""
full_name = open("/etc/cp-release").read().strip()
with open("/etc/cloud-version") as f:
for line in f:
k, _, v = line.partition(": ")
v = v.strip()
if k == "release":
release = v
elif k == "take":
take = v
elif k == "build":
build = v
return ["gaia", take + "." + build, release, full_name]
def get_distro():
if 'FreeBSD' in platform.system():
release = re.sub('\-.*\Z', '', ustr(platform.release())) # pylint: disable=W1401
osinfo = ['freebsd', release, '', 'freebsd']
elif 'OpenBSD' in platform.system():
release = re.sub('\-.*\Z', '', ustr(platform.release())) # pylint: disable=W1401
osinfo = ['openbsd', release, '', 'openbsd']
elif 'Linux' in platform.system():
osinfo = get_linux_distribution(0, 'alpine')
elif 'NS-BSD' in platform.system():
release = re.sub('\-.*\Z', '', ustr(platform.release())) # pylint: disable=W1401
osinfo = ['nsbsd', release, '', 'nsbsd']
else:
try:
# dist() removed in Python 3.8
osinfo = list(platform.dist()) + [''] # pylint: disable=W1505,E1101
except Exception:
osinfo = ['UNKNOWN', 'FFFF', '', '']
# The platform.py lib has issue with detecting oracle linux distribution.
# Merge the following patch provided by oracle as a temporary fix.
if os.path.exists("/etc/oracle-release"):
osinfo[2] = "oracle"
osinfo[3] = "Oracle Linux"
if os.path.exists("/etc/euleros-release"):
osinfo[0] = "euleros"
if os.path.exists("/etc/UnionTech-release"):
osinfo[0] = "uos"
if os.path.exists("/etc/mariner-release"):
osinfo[0] = "mariner"
# The platform.py lib has issue with detecting BIG-IP linux distribution.
# Merge the following patch provided by F5.
if os.path.exists("/shared/vadc"):
osinfo = get_f5_platform()
if os.path.exists("/etc/cp-release"):
osinfo = get_checkpoint_platform()
if os.path.exists("/home/guestshell/azure"):
osinfo = ['iosxe', 'csr1000v', '', 'Cisco IOSXE Linux']
if os.path.exists("/etc/photon-release"):
osinfo[0] = "photonos"
# Remove trailing whitespace and quote in distro name
osinfo[0] = osinfo[0].strip('"').strip(' ').lower()
return osinfo
COMMAND_ABSENT = ustr("Absent")
COMMAND_FAILED = ustr("Failed")
def get_lis_version():
"""
This uses the Linux kernel's 'modinfo' command to retrieve the
"version" field for the "hv_vmbus" kernel module (the LIS
drivers). This is the documented method to retrieve the LIS module
version. Every Linux guest on Hyper-V will have this driver, but
it may not be installed as a module (it could instead be built
into the kernel). In that case, this will return "Absent" instead
of the version, indicating the driver version can be deduced from
the kernel version. It will only return "Failed" in the presence
of an exception.
This function is used to generate telemetry for the version of the
LIS drivers installed on the VM. The function and associated
telemetry can be removed after a few releases.
"""
try:
modinfo_output = shellutil.run_command(["modinfo", "-F", "version", "hv_vmbus"])
if modinfo_output:
return modinfo_output
# If the system doesn't have LIS drivers, 'modinfo' will
# return nothing on stdout, which will cause 'run_command'
# to return an empty string.
return COMMAND_ABSENT
except Exception:
# Ignore almost every possible exception because this is in a
# critical code path. Unfortunately the logger isn't already
# imported in this module or we'd log this too.
return COMMAND_FAILED
def has_logrotate():
try:
logrotate_version = shellutil.run_command(["logrotate", "--version"]).split("\n")[0]
return logrotate_version
except shellutil.CommandError:
# A non-zero return code means that logrotate isn't present on
# the system; --version shouldn't fail otherwise.
return COMMAND_ABSENT
except Exception:
return COMMAND_FAILED
AGENT_NAME = "WALinuxAgent"
AGENT_LONG_NAME = "Azure Linux Agent"
#
# IMPORTANT: Please be sure that the version is always 9.9.9.9 on the develop branch. Automation requires this, otherwise
# DCR may test the wrong agent version.
#
# When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0
#
AGENT_VERSION = '2.11.1.4'
AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION)
AGENT_DESCRIPTION = """
The Azure Linux Agent supports the provisioning and running of Linux
VMs in the Azure cloud. This package should be installed on Linux disk
images that are built to run in the Azure environment.
"""
AGENT_DIR_GLOB = "{0}-*".format(AGENT_NAME)
AGENT_PKG_GLOB = "{0}-*.zip".format(AGENT_NAME)
AGENT_PATTERN = "{0}-(.*)".format(AGENT_NAME)
AGENT_NAME_PATTERN = re.compile(AGENT_PATTERN)
AGENT_PKG_PATTERN = re.compile(AGENT_PATTERN+"\.zip") # pylint: disable=W1401
AGENT_DIR_PATTERN = re.compile(".*/{0}".format(AGENT_PATTERN))
# The execution mode of the VM - IAAS or PAAS. Linux VMs are only executed in IAAS mode.
AGENT_EXECUTION_MODE = "IAAS"
EXT_HANDLER_PATTERN = b".*/WALinuxAgent-(\d+.\d+.\d+[.\d+]*).*-run-exthandlers" # pylint: disable=W1401
EXT_HANDLER_REGEX = re.compile(EXT_HANDLER_PATTERN)
__distro__ = get_distro()
DISTRO_NAME = __distro__[0]
DISTRO_VERSION = __distro__[1]
DISTRO_CODE_NAME = __distro__[2]
DISTRO_FULL_NAME = __distro__[3]
PY_VERSION = sys.version_info
PY_VERSION_MAJOR = sys.version_info[0]
PY_VERSION_MINOR = sys.version_info[1]
PY_VERSION_MICRO = sys.version_info[2]
# Set the CURRENT_AGENT and CURRENT_VERSION to match the agent directory name
# - This ensures the agent will "see itself" using the same name and version
# as the code that downloads agents.
def set_current_agent():
path = os.getcwd()
lib_dir = conf.get_lib_dir()
if lib_dir[-1] != os.path.sep:
lib_dir += os.path.sep
agent = path[len(lib_dir):].split(os.path.sep)[0]
match = AGENT_NAME_PATTERN.match(agent)
if match:
version = match.group(1)
else:
agent = AGENT_LONG_VERSION
version = AGENT_VERSION
return agent, FlexibleVersion(version)
def is_agent_package(path):
path = os.path.basename(path)
return not re.match(AGENT_PKG_PATTERN, path) is None
def is_agent_path(path):
path = os.path.basename(path)
return not re.match(AGENT_NAME_PATTERN, path) is None
CURRENT_AGENT, CURRENT_VERSION = set_current_agent()
def set_goal_state_agent():
agent = None
if os.path.isdir("/proc"):
pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]
else:
pids = []
for pid in pids:
try:
pname = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
match = EXT_HANDLER_REGEX.match(pname)
if match:
agent = match.group(1)
if PY_VERSION_MAJOR > 2:
agent = agent.decode('UTF-8')
break
except IOError:
continue
if agent is None:
agent = CURRENT_VERSION
return agent
GOAL_STATE_AGENT_VERSION = set_goal_state_agent()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/ 0000775 0000000 0000000 00000000000 14626177470 0022220 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/__init__.py 0000664 0000000 0000000 00000001261 14626177470 0024331 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.daemon.main import get_daemon_handler
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/main.py 0000664 0000000 0000000 00000016244 14626177470 0023525 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import sys
import time
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
from azurelinuxagent.common.event import add_event, WALAEventOperation, initialize_event_logger_vminfo_common_parameters
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.pa.rdma.rdma import setup_rdma_device
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_NAME, \
AGENT_VERSION, \
DISTRO_NAME, DISTRO_VERSION, PY_VERSION_MAJOR, PY_VERSION_MINOR, \
PY_VERSION_MICRO
from azurelinuxagent.daemon.resourcedisk import get_resourcedisk_handler
from azurelinuxagent.daemon.scvmm import get_scvmm_handler
from azurelinuxagent.ga.update import get_update_handler
from azurelinuxagent.pa.provision import get_provision_handler
from azurelinuxagent.pa.rdma import get_rdma_handler
OPENSSL_FIPS_ENVIRONMENT = "OPENSSL_FIPS"
def get_daemon_handler():
return DaemonHandler()
class DaemonHandler(object):
"""
Main thread of daemon. It will invoke other threads to do actual work
"""
def __init__(self):
self.running = True
self.osutil = get_osutil()
def run(self, child_args=None):
#
# The Container ID in telemetry events is retrieved from the goal state. We can fetch the goal state
# only after protocol detection, which is done during provisioning.
#
# Be aware that telemetry events emitted before that will not include the Container ID.
#
logger.info("{0} Version: {1}", AGENT_LONG_NAME, AGENT_VERSION)
logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION)
logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO)
self.check_pid()
self.initialize_environment()
# If FIPS is enabled, set the OpenSSL environment variable
# Note:
# -- Subprocesses inherit the current environment
if conf.get_fips_enabled():
os.environ[OPENSSL_FIPS_ENVIRONMENT] = '1'
while self.running:
try:
self.daemon(child_args)
except Exception as e: # pylint: disable=W0612
err_msg = textutil.format_exception(e)
add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg),
op=WALAEventOperation.UnhandledError)
logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon")
time.sleep(15)
def check_pid(self):
"""Check whether daemon is already running"""
pid = None
pid_file = conf.get_agent_pid_file_path()
if os.path.isfile(pid_file):
pid = fileutil.read_file(pid_file)
if self.osutil.check_pid_alive(pid):
logger.info("Daemon is already running: {0}", pid)
sys.exit(0)
fileutil.write_file(pid_file, ustr(os.getpid()))
def sleep_if_disabled(self):
agent_disabled_file_path = conf.get_disable_agent_file_path()
if os.path.exists(agent_disabled_file_path):
import threading
logger.warn("Disabling the guest agent by sleeping forever; to re-enable, remove {0} and restart".format(agent_disabled_file_path))
logger.warn("To enable VM extensions, also ensure that the VM's osProfile.allowExtensionOperations property is set to true.")
self.running = False
disable_event = threading.Event()
disable_event.wait()
def initialize_environment(self):
# Create lib dir
if not os.path.isdir(conf.get_lib_dir()):
fileutil.mkdir(conf.get_lib_dir(), mode=0o700)
os.chdir(conf.get_lib_dir())
def _initialize_telemetry(self):
protocol = self.protocol_util.get_protocol()
initialize_event_logger_vminfo_common_parameters(protocol)
def daemon(self, child_args=None):
logger.info("Run daemon")
self.protocol_util = get_protocol_util() # pylint: disable=W0201
self.scvmm_handler = get_scvmm_handler() # pylint: disable=W0201
self.resourcedisk_handler = get_resourcedisk_handler() # pylint: disable=W0201
self.rdma_handler = get_rdma_handler() # pylint: disable=W0201
self.provision_handler = get_provision_handler() # pylint: disable=W0201
self.update_handler = get_update_handler() # pylint: disable=W0201
if conf.get_detect_scvmm_env():
self.scvmm_handler.run()
if conf.get_resourcedisk_format():
self.resourcedisk_handler.run()
# Always redetermine the protocol start (e.g., wireserver vs.
# on-premise) since a VHD can move between environments
self.protocol_util.clear_protocol()
self.provision_handler.run()
# Once we have the protocol, complete initialization of the telemetry fields
# that require the goal state and IMDS
self._initialize_telemetry()
# Enable RDMA, continue in errors
if conf.enable_rdma():
nd_version = self.rdma_handler.get_rdma_version()
self.rdma_handler.install_driver_if_needed()
logger.info("RDMA capabilities are enabled in configuration")
try:
# Ensure the most recent SharedConfig is available
# - Changes to RDMA state may not increment the goal state
# incarnation number. A forced update ensures the most
# current values.
protocol = self.protocol_util.get_protocol()
goal_state = GoalState(protocol, goal_state_properties=GoalStateProperties.SharedConfig)
setup_rdma_device(nd_version, goal_state.shared_conf)
except Exception as e:
logger.error("Error setting up rdma device: %s" % e)
else:
logger.info("RDMA capabilities are not enabled, skipping")
self.sleep_if_disabled()
# Disable output to /dev/console once provisioning has completed
if logger.console_output_enabled():
logger.info("End of log to /dev/console. The agent will now check for updates and then will process extensions.")
logger.disable_console_output()
while self.running:
self.update_handler.run_latest(child_args=child_args)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/resourcedisk/ 0000775 0000000 0000000 00000000000 14626177470 0024722 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/resourcedisk/__init__.py 0000664 0000000 0000000 00000001347 14626177470 0027040 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.daemon.resourcedisk.factory import get_resourcedisk_handler
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/resourcedisk/default.py 0000664 0000000 0000000 00000035413 14626177470 0026726 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import re
import stat
import sys
import threading
from time import sleep
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.future import ustr
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common.event import add_event, WALAEventOperation
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.exception import ResourceDiskError
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.version import AGENT_NAME
DATALOSS_WARNING_FILE_NAME = "DATALOSS_WARNING_README.txt"
DATA_LOSS_WARNING = """\
WARNING: THIS IS A TEMPORARY DISK.
Any data stored on this drive is SUBJECT TO LOSS and THERE IS NO WAY TO RECOVER IT.
Please do not use this disk for storing any personal or application data.
For additional details to please refer to the MSDN documentation at :
http://msdn.microsoft.com/en-us/library/windowsazure/jj672979.aspx
"""
class ResourceDiskHandler(object):
def __init__(self):
self.osutil = get_osutil()
self.fs = conf.get_resourcedisk_filesystem()
def start_activate_resource_disk(self):
disk_thread = threading.Thread(target=self.run)
disk_thread.start()
def run(self):
mount_point = None
if conf.get_resourcedisk_format():
mount_point = self.activate_resource_disk()
if mount_point is not None and \
conf.get_resourcedisk_enable_swap():
self.enable_swap(mount_point)
def activate_resource_disk(self):
logger.info("Activate resource disk")
try:
mount_point = conf.get_resourcedisk_mountpoint()
mount_point = self.mount_resource_disk(mount_point)
warning_file = os.path.join(mount_point,
DATALOSS_WARNING_FILE_NAME)
try:
fileutil.write_file(warning_file, DATA_LOSS_WARNING)
except IOError as e:
logger.warn("Failed to write data loss warning:{0}", e)
return mount_point
except ResourceDiskError as e:
logger.error("Failed to mount resource disk {0}", e)
add_event(name=AGENT_NAME, is_success=False, message=ustr(e),
op=WALAEventOperation.ActivateResourceDisk)
return None
def enable_swap(self, mount_point):
logger.info("Enable swap")
try:
size_mb = conf.get_resourcedisk_swap_size_mb()
self.create_swap_space(mount_point, size_mb)
except ResourceDiskError as e:
logger.error("Failed to enable swap {0}", e)
def reread_partition_table(self, device):
if shellutil.run("sfdisk -R {0}".format(device), chk_err=False):
shellutil.run("blockdev --rereadpt {0}".format(device),
chk_err=False)
def mount_resource_disk(self, mount_point):
device = self.osutil.device_for_ide_port(1)
if device is None:
raise ResourceDiskError("unable to detect disk topology")
device = "/dev/{0}".format(device)
partition = device + "1"
mount_list = shellutil.run_get_output("mount")[1]
existing = self.osutil.get_mount_point(mount_list, device)
if existing:
logger.info("Resource disk [{0}] is already mounted [{1}]",
partition,
existing)
return existing
try:
fileutil.mkdir(mount_point, mode=0o755)
except OSError as ose:
msg = "Failed to create mount point " \
"directory [{0}]: {1}".format(mount_point, ose)
logger.error(msg)
raise ResourceDiskError(msg=msg, inner=ose)
logger.info("Examining partition table")
ret = shellutil.run_get_output("parted {0} print".format(device))
if ret[0]:
raise ResourceDiskError("Could not determine partition info for "
"{0}: {1}".format(device, ret[1]))
force_option = 'F'
if self.fs == 'xfs':
force_option = 'f'
mkfs_string = "mkfs.{0} -{2} {1}".format(
self.fs, partition, force_option)
if "gpt" in ret[1]:
logger.info("GPT detected, finding partitions")
parts = [x for x in ret[1].split("\n") if
re.match(r"^\s*[0-9]+", x)]
logger.info("Found {0} GPT partition(s).", len(parts))
if len(parts) > 1:
logger.info("Removing old GPT partitions")
for i in range(1, len(parts) + 1):
logger.info("Remove partition {0}", i)
shellutil.run("parted {0} rm {1}".format(device, i))
logger.info("Creating new GPT partition")
shellutil.run(
"parted {0} mkpart primary 0% 100%".format(device))
logger.info("Format partition [{0}]", mkfs_string)
shellutil.run(mkfs_string)
else:
logger.info("GPT not detected, determining filesystem")
ret = self.change_partition_type(
suppress_message=True,
option_str="{0} 1 -n".format(device))
ptype = ret[1].strip()
if ptype == "7" and self.fs != "ntfs":
logger.info("The partition is formatted with ntfs, updating "
"partition type to 83")
self.change_partition_type(
suppress_message=False,
option_str="{0} 1 83".format(device))
self.reread_partition_table(device)
logger.info("Format partition [{0}]", mkfs_string)
shellutil.run(mkfs_string)
else:
logger.info("The partition type is {0}", ptype)
mount_options = conf.get_resourcedisk_mountoptions()
mount_string = self.get_mount_string(mount_options,
partition,
mount_point)
attempts = 5
while not os.path.exists(partition) and attempts > 0:
logger.info("Waiting for partition [{0}], {1} attempts remaining",
partition,
attempts)
sleep(5)
attempts -= 1
if not os.path.exists(partition):
raise ResourceDiskError(
"Partition was not created [{0}]".format(partition))
logger.info("Mount resource disk [{0}]", mount_string)
ret, output = shellutil.run_get_output(mount_string, chk_err=False)
# if the exit code is 32, then the resource disk can be already mounted
if ret == 32 and output.find("is already mounted") != -1:
logger.warn("Could not mount resource disk: {0}", output)
elif ret != 0:
# Some kernels seem to issue an async partition re-read after a
# 'parted' command invocation. This causes mount to fail if the
# partition re-read is not complete by the time mount is
# attempted. Seen in CentOS 7.2. Force a sequential re-read of
# the partition and try mounting.
logger.warn("Failed to mount resource disk. "
"Retry mounting after re-reading partition info.")
self.reread_partition_table(device)
ret, output = shellutil.run_get_output(mount_string, chk_err=False)
if ret:
logger.warn("Failed to mount resource disk. "
"Attempting to format and retry mount. [{0}]",
output)
shellutil.run(mkfs_string)
ret, output = shellutil.run_get_output(mount_string)
if ret:
raise ResourceDiskError("Could not mount {0} "
"after syncing partition table: "
"[{1}] {2}".format(partition,
ret,
output))
logger.info("Resource disk {0} is mounted at {1} with {2}",
device,
mount_point,
self.fs)
return mount_point
def change_partition_type(self, suppress_message, option_str):
"""
use sfdisk to change partition type.
First try with --part-type; if fails, fall back to -c
"""
option_to_use = '--part-type'
command = "sfdisk {0} {1} {2}".format(
option_to_use, '-f' if suppress_message else '', option_str)
err_code, output = shellutil.run_get_output(
command, chk_err=False, log_cmd=True)
# fall back to -c
if err_code != 0:
logger.info(
"sfdisk with --part-type failed [{0}], retrying with -c",
err_code)
option_to_use = '-c'
command = "sfdisk {0} {1} {2}".format(
option_to_use, '-f' if suppress_message else '', option_str)
err_code, output = shellutil.run_get_output(command, log_cmd=True)
if err_code == 0:
logger.info('{0} succeeded',
command)
else:
logger.error('{0} failed [{1}: {2}]',
command,
err_code,
output)
return err_code, output
def get_mount_string(self, mount_options, partition, mount_point):
if mount_options is not None:
return 'mount -t {0} -o {1} {2} {3}'.format(
self.fs,
mount_options,
partition,
mount_point
)
else:
return 'mount -t {0} {1} {2}'.format(
self.fs,
partition,
mount_point
)
@staticmethod
def check_existing_swap_file(swapfile, swaplist, size):
if swapfile in swaplist and os.path.isfile(
swapfile) and os.path.getsize(swapfile) == size:
logger.info("Swap already enabled")
# restrict access to owner (remove all access from group, others)
swapfile_mode = os.stat(swapfile).st_mode
if swapfile_mode & (stat.S_IRWXG | stat.S_IRWXO):
swapfile_mode = swapfile_mode & ~(stat.S_IRWXG | stat.S_IRWXO)
logger.info(
"Changing mode of {0} to {1:o}".format(
swapfile, swapfile_mode))
os.chmod(swapfile, swapfile_mode)
return True
return False
def create_swap_space(self, mount_point, size_mb):
size_kb = size_mb * 1024
size = size_kb * 1024
swapfile = os.path.join(mount_point, 'swapfile')
swaplist = shellutil.run_get_output("swapon -s")[1]
if self.check_existing_swap_file(swapfile, swaplist, size):
return
if os.path.isfile(swapfile) and os.path.getsize(swapfile) != size:
logger.info("Remove old swap file")
shellutil.run("swapoff {0}".format(swapfile), chk_err=False)
os.remove(swapfile)
if not os.path.isfile(swapfile):
logger.info("Create swap file")
self.mkfile(swapfile, size_kb * 1024)
shellutil.run("mkswap {0}".format(swapfile))
if shellutil.run("swapon {0}".format(swapfile)):
raise ResourceDiskError("{0}".format(swapfile))
logger.info("Enabled {0}KB of swap at {1}".format(size_kb, swapfile))
def mkfile(self, filename, nbytes):
"""
Create a non-sparse file of that size. Deletes and replaces existing
file.
To allow efficient execution, fallocate will be tried first. This
includes
``os.posix_fallocate`` on Python 3.3+ (unix) and the ``fallocate``
command
in the popular ``util-linux{,-ng}`` package.
A dd fallback will be tried too. When size < 64M, perform
single-pass dd.
Otherwise do two-pass dd.
"""
if not isinstance(nbytes, int):
nbytes = int(nbytes)
if nbytes <= 0:
raise ResourceDiskError("Invalid swap size [{0}]".format(nbytes))
if os.path.isfile(filename):
os.remove(filename)
# If file system is xfs, use dd right away as we have been reported that
# swap enabling fails in xfs fs when disk space is allocated with
# fallocate
ret = 0
fn_sh = shellutil.quote((filename,))
if self.fs not in ['xfs', 'ext4']:
# os.posix_fallocate
if sys.version_info >= (3, 3):
# Probable errors:
# - OSError: Seen on Cygwin, libc notimpl?
# - AttributeError: What if someone runs this under...
fd = None
try:
fd = os.open(
filename,
os.O_CREAT | os.O_WRONLY | os.O_EXCL,
stat.S_IRUSR | stat.S_IWUSR)
os.posix_fallocate(fd, 0, nbytes) # pylint: disable=no-member
return 0
except BaseException:
# Not confident with this thing, just keep trying...
pass
finally:
if fd is not None:
os.close(fd)
# fallocate command
ret = shellutil.run(
u"umask 0077 && fallocate -l {0} {1}".format(nbytes, fn_sh))
if ret == 0:
return ret
logger.info("fallocate unsuccessful, falling back to dd")
# dd fallback
dd_maxbs = 64 * 1024 ** 2
dd_cmd = "umask 0077 && dd if=/dev/zero bs={0} count={1} " \
"conv=notrunc of={2}"
blocks = int(nbytes / dd_maxbs)
if blocks > 0:
ret = shellutil.run(dd_cmd.format(dd_maxbs, blocks, fn_sh)) << 8
remains = int(nbytes % dd_maxbs)
if remains > 0:
ret += shellutil.run(dd_cmd.format(remains, 1, fn_sh))
if ret == 0:
logger.info("dd successful")
else:
logger.error("dd unsuccessful")
return ret
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/resourcedisk/factory.py 0000664 0000000 0000000 00000002575 14626177470 0026754 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, DISTRO_FULL_NAME
from .default import ResourceDiskHandler
from .freebsd import FreeBSDResourceDiskHandler
from .openbsd import OpenBSDResourceDiskHandler
from .openwrt import OpenWRTResourceDiskHandler
def get_resourcedisk_handler(distro_name=DISTRO_NAME,
distro_version=DISTRO_VERSION, # pylint: disable=W0613
distro_full_name=DISTRO_FULL_NAME): # pylint: disable=W0613
if distro_name == "freebsd":
return FreeBSDResourceDiskHandler()
if distro_name == "openbsd":
return OpenBSDResourceDiskHandler()
if distro_name == "openwrt":
return OpenWRTResourceDiskHandler()
return ResourceDiskHandler()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/resourcedisk/freebsd.py 0000664 0000000 0000000 00000016250 14626177470 0026712 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common.exception import ResourceDiskError
from azurelinuxagent.daemon.resourcedisk.default import ResourceDiskHandler
class FreeBSDResourceDiskHandler(ResourceDiskHandler):
"""
This class handles resource disk mounting for FreeBSD.
The resource disk locates at following slot:
scbus2 on blkvsc1 bus 0:
at scbus2 target 1 lun 0 (da1,pass2)
There are 2 variations based on partition table type:
1. MBR: The resource disk partition is /dev/da1s1
2. GPT: The resource disk partition is /dev/da1p2, /dev/da1p1 is for reserved usage.
"""
def __init__(self): # pylint: disable=W0235
super(FreeBSDResourceDiskHandler, self).__init__()
@staticmethod
def parse_gpart_list(data):
dic = {}
for line in data.split('\n'):
if line.find("Geom name: ") != -1:
geom_name = line[11:]
elif line.find("scheme: ") != -1:
dic[geom_name] = line[8:]
return dic
def mount_resource_disk(self, mount_point):
fs = self.fs
if fs != 'ufs':
raise ResourceDiskError(
"Unsupported filesystem type:{0}, only ufs is supported.".format(fs))
# 1. Detect device
err, output = shellutil.run_get_output('gpart list')
if err:
raise ResourceDiskError(
"Unable to detect resource disk device:{0}".format(output))
disks = self.parse_gpart_list(output)
device = self.osutil.device_for_ide_port(1)
if device is None or device not in disks:
# fallback logic to find device
err, output = shellutil.run_get_output(
'camcontrol periphlist 2:1:0')
if err:
# try again on "3:1:0"
err, output = shellutil.run_get_output(
'camcontrol periphlist 3:1:0')
if err:
raise ResourceDiskError(
"Unable to detect resource disk device:{0}".format(output))
# 'da1: generation: 4 index: 1 status: MORE\npass2: generation: 4 index: 2 status: LAST\n'
for line in output.split('\n'):
index = line.find(':')
if index > 0:
geom_name = line[:index]
if geom_name in disks:
device = geom_name
break
if not device:
raise ResourceDiskError("Unable to detect resource disk device.")
logger.info('Resource disk device {0} found.', device)
# 2. Detect partition
partition_table_type = disks[device]
if partition_table_type == 'MBR':
provider_name = device + 's1'
elif partition_table_type == 'GPT':
provider_name = device + 'p2'
else:
raise ResourceDiskError(
"Unsupported partition table type:{0}".format(output))
err, output = shellutil.run_get_output(
'gpart show -p {0}'.format(device))
if err or output.find(provider_name) == -1:
raise ResourceDiskError("Resource disk partition not found.")
partition = '/dev/' + provider_name
logger.info('Resource disk partition {0} found.', partition)
# 3. Mount partition
mount_list = shellutil.run_get_output("mount")[1]
existing = self.osutil.get_mount_point(mount_list, partition)
if existing:
logger.info("Resource disk {0} is already mounted", partition)
return existing
fileutil.mkdir(mount_point, mode=0o755)
mount_cmd = 'mount -t {0} {1} {2}'.format(fs, partition, mount_point)
err = shellutil.run(mount_cmd, chk_err=False)
if err:
logger.info(
'Creating {0} filesystem on partition {1}'.format(
fs, partition))
err, output = shellutil.run_get_output(
'newfs -U {0}'.format(partition))
if err:
raise ResourceDiskError(
"Failed to create new filesystem on partition {0}, error:{1}" .format(
partition, output))
err, output = shellutil.run_get_output(mount_cmd, chk_err=False)
if err:
raise ResourceDiskError(
"Failed to mount partition {0}, error {1}".format(
partition, output))
logger.info(
"Resource disk partition {0} is mounted at {1} with fstype {2}",
partition,
mount_point,
fs)
return mount_point
def create_swap_space(self, mount_point, size_mb):
size_kb = size_mb * 1024
size = size_kb * 1024
swapfile = os.path.join(mount_point, 'swapfile')
swaplist = shellutil.run_get_output("swapctl -l")[1]
if self.check_existing_swap_file(swapfile, swaplist, size):
return
if os.path.isfile(swapfile) and os.path.getsize(swapfile) != size:
logger.info("Remove old swap file")
shellutil.run("swapoff {0}".format(swapfile), chk_err=False)
os.remove(swapfile)
if not os.path.isfile(swapfile):
logger.info("Create swap file")
self.mkfile(swapfile, size_kb * 1024)
mddevice = shellutil.run_get_output(
"mdconfig -a -t vnode -f {0}".format(swapfile))[1].rstrip()
shellutil.run("chmod 0600 /dev/{0}".format(mddevice))
if conf.get_resourcedisk_enable_swap_encryption():
shellutil.run("kldload aesni")
shellutil.run("kldload cryptodev")
shellutil.run("kldload geom_eli")
shellutil.run(
"geli onetime -e AES-XTS -l 256 -d /dev/{0}".format(mddevice))
shellutil.run("chmod 0600 /dev/{0}.eli".format(mddevice))
if shellutil.run("swapon /dev/{0}.eli".format(mddevice)):
raise ResourceDiskError("/dev/{0}.eli".format(mddevice))
logger.info(
"Enabled {0}KB of swap at /dev/{1}.eli ({2})".format(size_kb, mddevice, swapfile))
else:
if shellutil.run("swapon /dev/{0}".format(mddevice)):
raise ResourceDiskError("/dev/{0}".format(mddevice))
logger.info(
"Enabled {0}KB of swap at /dev/{1} ({2})".format(size_kb, mddevice, swapfile))
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/resourcedisk/openbsd.py 0000664 0000000 0000000 00000011443 14626177470 0026731 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
# Copyright 2017 Reyk Floeter
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and OpenSSL 1.0+
#
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common.exception import ResourceDiskError
from azurelinuxagent.daemon.resourcedisk.default import ResourceDiskHandler
class OpenBSDResourceDiskHandler(ResourceDiskHandler):
def __init__(self):
super(OpenBSDResourceDiskHandler, self).__init__()
# Fase File System (FFS) is UFS
if self.fs == 'ufs' or self.fs == 'ufs2':
self.fs = 'ffs'
def create_swap_space(self, mount_point, size_mb):
pass
def enable_swap(self, mount_point):
size_mb = conf.get_resourcedisk_swap_size_mb()
if size_mb:
logger.info("Enable swap")
device = self.osutil.device_for_ide_port(1)
err, output = shellutil.run_get_output("swapctl -a /dev/"
"{0}b".format(device),
chk_err=False)
if err:
logger.error("Failed to enable swap, error {0}", output)
def mount_resource_disk(self, mount_point):
fs = self.fs
if fs != 'ffs':
raise ResourceDiskError("Unsupported filesystem type: {0}, only "
"ufs/ffs is supported.".format(fs))
# 1. Get device
device = self.osutil.device_for_ide_port(1)
if not device:
raise ResourceDiskError("Unable to detect resource disk device.")
logger.info('Resource disk device {0} found.', device)
# 2. Get partition
partition = "/dev/{0}a".format(device)
# 3. Mount partition
mount_list = shellutil.run_get_output("mount")[1]
existing = self.osutil.get_mount_point(mount_list, partition)
if existing:
logger.info("Resource disk {0} is already mounted", partition)
return existing
fileutil.mkdir(mount_point, mode=0o755)
mount_cmd = 'mount -t {0} {1} {2}'.format(self.fs,
partition, mount_point)
err = shellutil.run(mount_cmd, chk_err=False)
if err:
logger.info('Creating {0} filesystem on {1}'.format(fs, device))
fdisk_cmd = "/sbin/fdisk -yi {0}".format(device)
err, output = shellutil.run_get_output(fdisk_cmd, chk_err=False)
if err:
raise ResourceDiskError("Failed to create new MBR on {0}, "
"error: {1}".format(device, output))
size_mb = conf.get_resourcedisk_swap_size_mb()
if size_mb:
if size_mb > 512 * 1024:
size_mb = 512 * 1024
disklabel_cmd = ("echo -e '{0} 1G-* 50%\nswap 1-{1}M 50%' "
"| disklabel -w -A -T /dev/stdin "
"{2}").format(mount_point, size_mb, device)
ret, output = shellutil.run_get_output(
disklabel_cmd, chk_err=False)
if ret:
raise ResourceDiskError("Failed to create new disklabel "
"on {0}, error "
"{1}".format(device, output))
err, output = shellutil.run_get_output("newfs -O2 {0}a"
"".format(device))
if err:
raise ResourceDiskError("Failed to create new filesystem on "
"partition {0}, error "
"{1}".format(partition, output))
err, output = shellutil.run_get_output(mount_cmd, chk_err=False)
if err:
raise ResourceDiskError("Failed to mount partition {0}, "
"error {1}".format(partition, output))
logger.info("Resource disk partition {0} is mounted at {1} with fstype "
"{2}", partition, mount_point, fs)
return mount_point
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/resourcedisk/openwrt.py 0000664 0000000 0000000 00000013337 14626177470 0027001 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
# Copyright 2018 Sonus Networks, Inc. (d.b.a. Ribbon Communications Operating Company)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
from time import sleep
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common.exception import ResourceDiskError
from azurelinuxagent.daemon.resourcedisk.default import ResourceDiskHandler
class OpenWRTResourceDiskHandler(ResourceDiskHandler):
def __init__(self):
super(OpenWRTResourceDiskHandler, self).__init__()
# Fase File System (FFS) is UFS
if self.fs == 'ufs' or self.fs == 'ufs2':
self.fs = 'ffs'
def reread_partition_table(self, device):
ret, output = shellutil.run_get_output("hdparm -z {0}".format(device), chk_err=False) # pylint: disable=W0612
if ret != 0:
logger.warn("Failed refresh the partition table.")
def mount_resource_disk(self, mount_point):
device = self.osutil.device_for_ide_port(1)
if device is None:
raise ResourceDiskError("unable to detect disk topology")
logger.info('Resource disk device {0} found.', device)
# 2. Get partition
device = "/dev/{0}".format(device)
partition = device + "1"
logger.info('Resource disk partition {0} found.', partition)
# 3. Mount partition
mount_list = shellutil.run_get_output("mount")[1]
existing = self.osutil.get_mount_point(mount_list, device)
if existing:
logger.info("Resource disk [{0}] is already mounted [{1}]",
partition,
existing)
return existing
try:
fileutil.mkdir(mount_point, mode=0o755)
except OSError as ose:
msg = "Failed to create mount point " \
"directory [{0}]: {1}".format(mount_point, ose)
logger.error(msg)
raise ResourceDiskError(msg=msg, inner=ose)
force_option = 'F'
if self.fs == 'xfs':
force_option = 'f'
mkfs_string = "mkfs.{0} -{2} {1}".format(self.fs, partition, force_option)
# Compare to the Default mount_resource_disk, we don't check for GPT that is not supported on OpenWRT
ret = self.change_partition_type(suppress_message=True, option_str="{0} 1 -n".format(device))
ptype = ret[1].strip()
if ptype == "7" and self.fs != "ntfs":
logger.info("The partition is formatted with ntfs, updating "
"partition type to 83")
self.change_partition_type(suppress_message=False, option_str="{0} 1 83".format(device))
self.reread_partition_table(device)
logger.info("Format partition [{0}]", mkfs_string)
shellutil.run(mkfs_string)
else:
logger.info("The partition type is {0}", ptype)
mount_options = conf.get_resourcedisk_mountoptions()
mount_string = self.get_mount_string(mount_options,
partition,
mount_point)
attempts = 5
while not os.path.exists(partition) and attempts > 0:
logger.info("Waiting for partition [{0}], {1} attempts remaining",
partition,
attempts)
sleep(5)
attempts -= 1
if not os.path.exists(partition):
raise ResourceDiskError("Partition was not created [{0}]".format(partition))
if os.path.ismount(mount_point):
logger.warn("Disk is already mounted on {0}", mount_point)
else:
# Some kernels seem to issue an async partition re-read after a
# command invocation. This causes mount to fail if the
# partition re-read is not complete by the time mount is
# attempted. Seen in CentOS 7.2. Force a sequential re-read of
# the partition and try mounting.
logger.info("Mounting after re-reading partition info.")
self.reread_partition_table(device)
logger.info("Mount resource disk [{0}]", mount_string)
ret, output = shellutil.run_get_output(mount_string)
if ret:
logger.warn("Failed to mount resource disk. "
"Attempting to format and retry mount. [{0}]",
output)
shellutil.run(mkfs_string)
ret, output = shellutil.run_get_output(mount_string)
if ret:
raise ResourceDiskError("Could not mount {0} "
"after syncing partition table: "
"[{1}] {2}".format(partition,
ret,
output))
logger.info("Resource disk {0} is mounted at {1} with {2}",
device,
mount_point,
self.fs)
return mount_point
Azure-WALinuxAgent-2b21de5/azurelinuxagent/daemon/scvmm.py 0000664 0000000 0000000 00000005327 14626177470 0023726 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import re
import os
import sys
import subprocess
import time
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common.osutil import get_osutil
VMM_CONF_FILE_NAME = "linuxosconfiguration.xml"
VMM_STARTUP_SCRIPT_NAME= "install"
def get_scvmm_handler():
return ScvmmHandler()
class ScvmmHandler(object):
def __init__(self):
self.osutil = get_osutil()
def detect_scvmm_env(self, dev_dir='/dev'):
logger.info("Detecting Microsoft System Center VMM Environment")
found=False
# try to load the ATAPI driver, continue on failure
self.osutil.try_load_atapiix_mod()
# cycle through all available /dev/sr*|hd*|cdrom*|cd* looking for the scvmm configuration file
mount_point = conf.get_dvd_mount_point()
for devices in filter(lambda x: x is not None, [re.match(r'(sr[0-9]|hd[c-z]|cdrom[0-9]?|cd[0-9]+)', dev) for dev in os.listdir(dev_dir)]):
dvd_device = os.path.join(dev_dir, devices.group(0))
self.osutil.mount_dvd(max_retry=1, chk_err=False, dvd_device=dvd_device, mount_point=mount_point)
found = os.path.isfile(os.path.join(mount_point, VMM_CONF_FILE_NAME))
if found:
self.start_scvmm_agent(mount_point=mount_point)
break
else:
self.osutil.umount_dvd(chk_err=False, mount_point=mount_point)
return found
def start_scvmm_agent(self, mount_point=None):
logger.info("Starting Microsoft System Center VMM Initialization "
"Process")
if mount_point is None:
mount_point = conf.get_dvd_mount_point()
startup_script = os.path.join(mount_point, VMM_STARTUP_SCRIPT_NAME)
with open(os.devnull, 'w') as devnull:
subprocess.Popen(["/bin/bash", startup_script, "-p " + mount_point],
stdout=devnull, stderr=devnull)
def run(self):
if self.detect_scvmm_env():
logger.info("Exiting")
time.sleep(300)
sys.exit(0)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/ 0000775 0000000 0000000 00000000000 14626177470 0021344 5 ustar 00root root 0000000 0000000 Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/__init__.py 0000664 0000000 0000000 00000001166 14626177470 0023461 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/agent_update_handler.py 0000664 0000000 0000000 00000030234 14626177470 0026055 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import os
from azurelinuxagent.common import conf, logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError, AgentFamilyMissingError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus, VERSION_0
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import get_daemon_version
from azurelinuxagent.ga.rsm_version_updater import RSMVersionUpdater
from azurelinuxagent.ga.self_update_version_updater import SelfUpdateVersionUpdater
def get_agent_update_handler(protocol):
return AgentUpdateHandler(protocol)
class AgentUpdateHandler(object):
"""
This class handles two type of agent updates. Handler initializes the updater to SelfUpdateVersionUpdater and switch to appropriate updater based on below conditions:
RSM update: This is the update requested by RSM. The contract between CRP and agent is we get following properties in the goal state:
version: it will have what version to update
isVersionFromRSM: True if the version is from RSM deployment.
isVMEnabledForRSMUpgrades: True if the VM is enabled for RSM upgrades.
if vm enabled for RSM upgrades, we use RSM update path. But if requested update is not by rsm deployment
we ignore the update.
Self update: We fallback to this if above is condition not met. This update to the largest version available in the manifest
Note: Self-update don't support downgrade.
Handler keeps the rsm state of last update is with RSM or not on every new goal state. Once handler decides which updater to use, then
does following steps:
1. Retrieve the agent version from the goal state.
2. Check if we allowed to update for that version.
3. Log the update message.
4. Purge the extra agents from disk.
5. Download the new agent.
6. Proceed with update.
[Note: 1.0.8.147 is the minimum supported version of HGPA which will have the isVersionFromRSM and isVMEnabledForRSMUpgrades properties in vmsettings.]
"""
def __init__(self, protocol):
self._protocol = protocol
self._gs_id = "unknown"
self._ga_family_type = conf.get_autoupdate_gafamily()
self._daemon_version = self._get_daemon_version_for_update()
self._last_attempted_update_error_msg = ""
# restore the state of rsm update. Default to self-update if last update is not with RSM.
if not self._get_is_last_update_with_rsm():
self._updater = SelfUpdateVersionUpdater(self._gs_id)
else:
self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version)
@staticmethod
def _get_daemon_version_for_update():
daemon_version = get_daemon_version()
if daemon_version != FlexibleVersion(VERSION_0):
return daemon_version
# We return 0.0.0.0 if daemon version is not specified. In that case,
# use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53.
return FlexibleVersion("2.2.53")
@staticmethod
def _get_rsm_update_state_file():
"""
This file keeps if last attempted update is rsm or not.
"""
return os.path.join(conf.get_lib_dir(), "rsm_update.json")
def _save_rsm_update_state(self):
"""
Save the rsm state empty file when we switch to RSM
"""
try:
with open(self._get_rsm_update_state_file(), "w"):
pass
except Exception as e:
logger.warn("Error creating the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e))
def _remove_rsm_update_state(self):
"""
Remove the rsm state file when we switch to self-update
"""
try:
if os.path.exists(self._get_rsm_update_state_file()):
os.remove(self._get_rsm_update_state_file())
except Exception as e:
logger.warn("Error removing the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e))
def _get_is_last_update_with_rsm(self):
"""
Returns True if state file exists as this consider as last update with RSM is true
"""
return os.path.exists(self._get_rsm_update_state_file())
def _get_agent_family_manifest(self, goal_state):
"""
Get the agent_family from last GS for the given family
Returns: first entry of Manifest
Exception if no manifests found in the last GS and log it only on new goal state
"""
family = self._ga_family_type
agent_families = goal_state.extensions_goal_state.agent_families
family_found = False
agent_family_manifests = []
for m in agent_families:
if m.name == family:
family_found = True
if len(m.uris) > 0:
agent_family_manifests.append(m)
if not family_found:
raise AgentFamilyMissingError(u"Agent family: {0} not found in the goal state: {1}, skipping agent update \n"
u"[Note: This error is permanent for this goal state and Will not log same error until we receive new goal state]".format(family, self._gs_id))
if len(agent_family_manifests) == 0:
raise AgentFamilyMissingError(
u"No manifest links found for agent family: {0} for goal state: {1}, skipping agent update \n"
u"[Note: This error is permanent for this goal state and will not log same error until we receive new goal state]".format(
family, self._gs_id))
return agent_family_manifests[0]
def run(self, goal_state, ext_gs_updated):
try:
# If auto update is disabled, we don't proceed with update
if not conf.get_auto_update_to_latest_version():
return
# Update the state only on new goal state
if ext_gs_updated:
self._gs_id = goal_state.extensions_goal_state.id
self._updater.sync_new_gs_id(self._gs_id)
agent_family = self._get_agent_family_manifest(goal_state)
# Updater will return True or False if we need to switch the updater
# If self-updater receives RSM update enabled, it will switch to RSM updater
# If RSM updater receives RSM update disabled, it will switch to self-update
# No change in updater if GS not updated
is_rsm_update_enabled = self._updater.is_rsm_update_enabled(agent_family, ext_gs_updated)
if not is_rsm_update_enabled and isinstance(self._updater, RSMVersionUpdater):
msg = "VM not enabled for RSM updates, switching to self-update mode"
logger.info(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
self._updater = SelfUpdateVersionUpdater(self._gs_id)
self._remove_rsm_update_state()
if is_rsm_update_enabled and isinstance(self._updater, SelfUpdateVersionUpdater):
msg = "VM enabled for RSM updates, switching to RSM update mode"
logger.info(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version)
self._save_rsm_update_state()
# If updater is changed in previous step, we allow update as it consider as first attempt. If not, it checks below condition
# RSM checks new goal state; self-update checks manifest download interval
if not self._updater.is_update_allowed_this_time(ext_gs_updated):
return
self._updater.retrieve_agent_version(agent_family, goal_state)
if not self._updater.is_retrieved_version_allowed_to_update(agent_family):
return
self._updater.log_new_agent_update_message()
agent = self._updater.download_and_get_new_agent(self._protocol, agent_family, goal_state)
# Below condition is to break the update loop if new agent is in bad state in previous attempts
# If the bad agent update already attempted 3 times, we don't want to continue with update anymore.
# Otherewise we allow the update by increment the update attempt count and clear the bad state to make good agent
# [Note: As a result, it is breaking contract between RSM and agent, we may NOT honor the RSM retries for that version]
if agent.get_update_attempt_count() >= 3:
msg = "Attempted enough update retries for version: {0} but still agent not recovered from bad state. So, we stop updating to this version".format(str(agent.version))
raise AgentUpdateError(msg)
else:
agent.clear_error()
agent.inc_update_attempt_count()
msg = "Agent update attempt count: {0} for version: {1}".format(agent.get_update_attempt_count(), str(agent.version))
logger.info(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
self._updater.purge_extra_agents_from_disk()
self._updater.proceed_with_update()
except Exception as err:
log_error = True
if isinstance(err, AgentUpgradeExitException):
raise err
elif isinstance(err, AgentUpdateError):
error_msg = ustr(err)
elif isinstance(err, AgentFamilyMissingError):
error_msg = ustr(err)
# Agent family missing error is permanent in the given goal state, so we don't want to log it on every iteration of main loop if there is no new goal state
log_error = ext_gs_updated
else:
error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err))
if log_error:
logger.warn(error_msg)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False)
self._last_attempted_update_error_msg = error_msg
def get_vmagent_update_status(self):
"""
This function gets the VMAgent update status as per the last attempted update.
Returns: None if fail to report or update never attempted with rsm version specified in GS
Note: We send the status regardless of updater type. Since we call this main loop, want to avoid fetching agent family to decide and send only if
vm enabled for rsm updates.
"""
try:
if conf.get_enable_ga_versioning():
if not self._last_attempted_update_error_msg:
status = VMAgentUpdateStatuses.Success
code = 0
else:
status = VMAgentUpdateStatuses.Error
code = 1
return VMAgentUpdateStatus(expected_version=str(self._updater.version), status=status, code=code, message=self._last_attempted_update_error_msg)
except Exception as err:
msg = "Unable to report agent update status: {0}".format(textutil.format_exception(err))
logger.warn(msg)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=msg, log_event=True)
return None
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/cgroup.py 0000664 0000000 0000000 00000036124 14626177470 0023223 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import errno
import os
import re
from datetime import timedelta
from azurelinuxagent.common import logger, conf
from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import fileutil
_REPORT_EVERY_HOUR = timedelta(hours=1)
_DEFAULT_REPORT_PERIOD = timedelta(seconds=conf.get_cgroup_check_period())
AGENT_NAME_TELEMETRY = "walinuxagent.service" # Name used for telemetry; it needs to be consistent even if the name of the service changes
AGENT_LOG_COLLECTOR = "azure-walinuxagent-logcollector"
class CounterNotFound(Exception):
pass
class MetricValue(object):
"""
Class for defining all the required metric fields to send telemetry.
"""
def __init__(self, category, counter, instance, value, report_period=_DEFAULT_REPORT_PERIOD):
self._category = category
self._counter = counter
self._instance = instance
self._value = value
self._report_period = report_period
@property
def category(self):
return self._category
@property
def counter(self):
return self._counter
@property
def instance(self):
return self._instance
@property
def value(self):
return self._value
@property
def report_period(self):
return self._report_period
class MetricsCategory(object):
MEMORY_CATEGORY = "Memory"
CPU_CATEGORY = "CPU"
class MetricsCounter(object):
PROCESSOR_PERCENT_TIME = "% Processor Time"
TOTAL_MEM_USAGE = "Total Memory Usage"
MAX_MEM_USAGE = "Max Memory Usage"
THROTTLED_TIME = "Throttled Time"
SWAP_MEM_USAGE = "Swap Memory Usage"
AVAILABLE_MEM = "Available MBytes"
USED_MEM = "Used MBytes"
re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n')
class CGroup(object):
def __init__(self, name, cgroup_path):
"""
Initialize _data collection for the Memory controller
:param: name: Name of the CGroup
:param: cgroup_path: Path of the controller
:return:
"""
self.name = name
self.path = cgroup_path
def __str__(self):
return "{0} [{1}]".format(self.name, self.path)
def _get_cgroup_file(self, file_name):
return os.path.join(self.path, file_name)
def _get_file_contents(self, file_name):
"""
Retrieve the contents to file.
:param str file_name: Name of file within that metric controller
:return: Entire contents of the file
:rtype: str
"""
parameter_file = self._get_cgroup_file(file_name)
return fileutil.read_file(parameter_file)
def _get_parameters(self, parameter_name, first_line_only=False):
"""
Retrieve the values of a parameter from a controller.
Returns a list of values in the file.
:param first_line_only: return only the first line.
:param str parameter_name: Name of file within that metric controller
:return: The first line of the file, without line terminator
:rtype: [str]
"""
result = []
try:
values = self._get_file_contents(parameter_name).splitlines()
result = values[0] if first_line_only else values
except IndexError:
parameter_filename = self._get_cgroup_file(parameter_name)
logger.error("File {0} is empty but should not be".format(parameter_filename))
raise CGroupsException("File {0} is empty but should not be".format(parameter_filename))
except Exception as e:
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101
raise e
parameter_filename = self._get_cgroup_file(parameter_name)
raise CGroupsException("Exception while attempting to read {0}".format(parameter_filename), e)
return result
def is_active(self):
try:
tasks = self._get_parameters("tasks")
if tasks:
return len(tasks) != 0
except (IOError, OSError) as e:
if e.errno == errno.ENOENT:
# only suppressing file not found exceptions.
pass
else:
logger.periodic_warn(logger.EVERY_HALF_HOUR,
'Could not get list of tasks from "tasks" file in the cgroup: {0}.'
' Internal error: {1}'.format(self.path, ustr(e)))
except CGroupsException as e:
logger.periodic_warn(logger.EVERY_HALF_HOUR,
'Could not get list of tasks from "tasks" file in the cgroup: {0}.'
' Internal error: {1}'.format(self.path, ustr(e)))
return False
def get_tracked_metrics(self, **_):
"""
Retrieves the current value of the metrics tracked for this cgroup and returns them as an array.
Note: Agent won't track the metrics if the current cpu ticks less than previous value and returns empty array.
"""
raise NotImplementedError()
class CpuCgroup(CGroup):
def __init__(self, name, cgroup_path):
super(CpuCgroup, self).__init__(name, cgroup_path)
self._osutil = get_osutil()
self._previous_cgroup_cpu = None
self._previous_system_cpu = None
self._current_cgroup_cpu = None
self._current_system_cpu = None
self._previous_throttled_time = None
self._current_throttled_time = None
def _get_cpu_ticks(self, allow_no_such_file_or_directory_error=False):
"""
Returns the number of USER_HZ of CPU time (user and system) consumed by this cgroup.
If allow_no_such_file_or_directory_error is set to True and cpuacct.stat does not exist the function
returns 0; this is useful when the function can be called before the cgroup has been created.
"""
try:
cpuacct_stat = self._get_file_contents('cpuacct.stat')
except Exception as e:
if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101
raise CGroupsException("Failed to read cpuacct.stat: {0}".format(ustr(e)))
if not allow_no_such_file_or_directory_error:
raise e
cpuacct_stat = None
cpu_ticks = 0
if cpuacct_stat is not None:
#
# Sample file:
# # cat /sys/fs/cgroup/cpuacct/azure.slice/walinuxagent.service/cpuacct.stat
# user 10190
# system 3160
#
match = re_user_system_times.match(cpuacct_stat)
if not match:
raise CGroupsException(
"The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpuacct.stat'), cpuacct_stat))
cpu_ticks = int(match.groups()[0]) + int(match.groups()[1])
return cpu_ticks
def get_throttled_time(self):
try:
with open(os.path.join(self.path, 'cpu.stat')) as cpu_stat:
#
# Sample file:
#
# # cat /sys/fs/cgroup/cpuacct/azure.slice/walinuxagent.service/cpu.stat
# nr_periods 51660
# nr_throttled 19461
# throttled_time 1529590856339
#
for line in cpu_stat:
match = re.match(r'throttled_time\s+(\d+)', line)
if match is not None:
return int(match.groups()[0])
raise Exception("Cannot find throttled_time")
except (IOError, OSError) as e:
if e.errno == errno.ENOENT:
return 0
raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e)))
except Exception as e:
raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e)))
def _cpu_usage_initialized(self):
return self._current_cgroup_cpu is not None and self._current_system_cpu is not None
def initialize_cpu_usage(self):
"""
Sets the initial values of CPU usage. This function must be invoked before calling get_cpu_usage().
"""
if self._cpu_usage_initialized():
raise CGroupsException("initialize_cpu_usage() should be invoked only once")
self._current_cgroup_cpu = self._get_cpu_ticks(allow_no_such_file_or_directory_error=True)
self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot()
self._current_throttled_time = self.get_throttled_time()
def get_cpu_usage(self):
"""
Computes the CPU used by the cgroup since the last call to this function.
The usage is measured as a percentage of utilization of 1 core in the system. For example,
using 1 core all of the time on a 4-core system would be reported as 100%.
NOTE: initialize_cpu_usage() must be invoked before calling get_cpu_usage()
"""
if not self._cpu_usage_initialized():
raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_usage()")
self._previous_cgroup_cpu = self._current_cgroup_cpu
self._previous_system_cpu = self._current_system_cpu
self._current_cgroup_cpu = self._get_cpu_ticks()
self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot()
cgroup_delta = self._current_cgroup_cpu - self._previous_cgroup_cpu
system_delta = max(1, self._current_system_cpu - self._previous_system_cpu)
return round(100.0 * self._osutil.get_processor_cores() * float(cgroup_delta) / float(system_delta), 3)
def get_cpu_throttled_time(self, read_previous_throttled_time=True):
"""
Computes the throttled time (in seconds) since the last call to this function.
NOTE: initialize_cpu_usage() must be invoked before calling this function
Compute only current throttled time if read_previous_throttled_time set to False
"""
if not read_previous_throttled_time:
return float(self.get_throttled_time() / 1E9)
if not self._cpu_usage_initialized():
raise CGroupsException(
"initialize_cpu_usage() must be invoked before the first call to get_throttled_time()")
self._previous_throttled_time = self._current_throttled_time
self._current_throttled_time = self.get_throttled_time()
return float(self._current_throttled_time - self._previous_throttled_time) / 1E9
def get_tracked_metrics(self, **kwargs):
tracked = []
cpu_usage = self.get_cpu_usage()
if cpu_usage >= float(0):
tracked.append(
MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage))
if 'track_throttled_time' in kwargs and kwargs['track_throttled_time']:
throttled_time = self.get_cpu_throttled_time()
if cpu_usage >= float(0) and throttled_time >= float(0):
tracked.append(
MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time))
return tracked
class MemoryCgroup(CGroup):
def __init__(self, name, cgroup_path):
super(MemoryCgroup, self).__init__(name, cgroup_path)
self._counter_not_found_error_count = 0
def _get_memory_stat_counter(self, counter_name):
try:
with open(os.path.join(self.path, 'memory.stat')) as memory_stat:
# cat /sys/fs/cgroup/memory/azure.slice/memory.stat
# cache 67178496
# rss 42340352
# rss_huge 6291456
# swap 0
for line in memory_stat:
re_memory_counter = r'{0}\s+(\d+)'.format(counter_name)
match = re.match(re_memory_counter, line)
if match is not None:
return int(match.groups()[0])
except (IOError, OSError) as e:
if e.errno == errno.ENOENT:
raise
raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e)))
except Exception as e:
raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e)))
raise CounterNotFound("Cannot find counter: {0}".format(counter_name))
def get_memory_usage(self):
"""
Collect RSS+CACHE from memory.stat cgroup.
:return: Memory usage in bytes
:rtype: int
"""
cache = self._get_memory_stat_counter("cache")
rss = self._get_memory_stat_counter("rss")
return cache + rss
def try_swap_memory_usage(self):
"""
Collect SWAP from memory.stat cgroup.
:return: Memory usage in bytes
:rtype: int
Note: stat file is the only place to get the SWAP since other swap related file memory.memsw.usage_in_bytes is for total Memory+SWAP.
"""
try:
return self._get_memory_stat_counter("swap")
except CounterNotFound as e:
if self._counter_not_found_error_count < 1:
logger.periodic_info(logger.EVERY_HALF_HOUR,
'{0} from "memory.stat" file in the cgroup: {1}---[Note: This log for informational purpose only and can be ignored]'.format(ustr(e), self.path))
self._counter_not_found_error_count += 1
return 0
def get_max_memory_usage(self):
"""
Collect memory.max_usage_in_bytes from the cgroup.
:return: Memory usage in bytes
:rtype: int
"""
usage = 0
try:
usage = int(self._get_parameters('memory.max_usage_in_bytes', first_line_only=True))
except Exception as e:
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101
raise
raise CGroupsException("Exception while attempting to read {0}".format("memory.max_usage_in_bytes"), e)
return usage
def get_tracked_metrics(self, **_):
return [
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.TOTAL_MEM_USAGE, self.name,
self.get_memory_usage()),
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.MAX_MEM_USAGE, self.name,
self.get_max_memory_usage(), _REPORT_EVERY_HOUR),
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name,
self.try_swap_memory_usage(), _REPORT_EVERY_HOUR)
]
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/cgroupapi.py 0000664 0000000 0000000 00000041212 14626177470 0023707 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import os
import re
import shutil
import subprocess
import threading
import uuid
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.conf import get_agent_pid_file_path
from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \
ExtensionOperationError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import systemd
from azurelinuxagent.common.utils import fileutil, shellutil
from azurelinuxagent.ga.extensionprocessutil import handle_process_completion, read_output, \
TELEMETRY_MESSAGE_MAX_LEN
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import get_distro
CGROUPS_FILE_SYSTEM_ROOT = '/sys/fs/cgroup'
CGROUP_CONTROLLERS = ["cpu", "memory"]
EXTENSION_SLICE_PREFIX = "azure-vmextensions"
class SystemdRunError(CGroupsException):
"""
Raised when systemd-run fails
"""
def __init__(self, msg=None):
super(SystemdRunError, self).__init__(msg)
class CGroupsApi(object):
@staticmethod
def cgroups_supported():
distro_info = get_distro()
distro_name = distro_info[0]
try:
distro_version = FlexibleVersion(distro_info[1])
except ValueError:
return False
return (distro_name.lower() == 'ubuntu' and distro_version.major >= 16) or \
(distro_name.lower() in ('centos', 'redhat') and 8 <= distro_version.major < 9)
@staticmethod
def track_cgroups(extension_cgroups):
try:
for cgroup in extension_cgroups:
CGroupsTelemetry.track_cgroup(cgroup)
except Exception as exception:
logger.warn("Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. "
"Error: {1}".format(cgroup.path, ustr(exception)))
@staticmethod
def get_processes_in_cgroup(cgroup_path):
with open(os.path.join(cgroup_path, "cgroup.procs"), "r") as cgroup_procs:
return [int(pid) for pid in cgroup_procs.read().split()]
@staticmethod
def _foreach_legacy_cgroup(operation):
"""
Previous versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent;
starting from version 2.2.41 we track the agent service in walinuxagent.service instead of WALinuxAgent/WALinuxAgent. Also,
when running under systemd, the PIDs should not be explicitly moved to the cgroup filesystem. The older daemons would
incorrectly do that under certain conditions.
This method checks for the existence of the legacy cgroups and, if the daemon's PID has been added to them, executes the
given operation on the cgroups. After this check, the method attempts to remove the legacy cgroups.
:param operation:
The function to execute on each legacy cgroup. It must take 2 arguments: the controller and the daemon's PID
"""
legacy_cgroups = []
for controller in ['cpu', 'memory']:
cgroup = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller, "WALinuxAgent", "WALinuxAgent")
if os.path.exists(cgroup):
logger.info('Found legacy cgroup {0}', cgroup)
legacy_cgroups.append((controller, cgroup))
try:
for controller, cgroup in legacy_cgroups:
procs_file = os.path.join(cgroup, "cgroup.procs")
if os.path.exists(procs_file):
procs_file_contents = fileutil.read_file(procs_file).strip()
daemon_pid = CGroupsApi.get_daemon_pid()
if ustr(daemon_pid) in procs_file_contents:
operation(controller, daemon_pid)
finally:
for _, cgroup in legacy_cgroups:
logger.info('Removing {0}', cgroup)
shutil.rmtree(cgroup, ignore_errors=True)
return len(legacy_cgroups)
@staticmethod
def get_daemon_pid():
return int(fileutil.read_file(get_agent_pid_file_path()).strip())
class SystemdCgroupsApi(CGroupsApi):
"""
Cgroups interface via systemd
"""
def __init__(self):
self._cgroup_mountpoints = None
self._agent_unit_name = None
self._systemd_run_commands = []
self._systemd_run_commands_lock = threading.RLock()
def get_systemd_run_commands(self):
"""
Returns a list of the systemd-run commands currently running (given as PIDs)
"""
with self._systemd_run_commands_lock:
return self._systemd_run_commands[:]
def get_cgroup_mount_points(self):
"""
Returns a tuple with the mount points for the cpu and memory controllers; the values can be None
if the corresponding controller is not mounted
"""
# the output of mount is similar to
# $ mount -t cgroup
# cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd)
# cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct)
# cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory)
# etc
#
if self._cgroup_mountpoints is None:
cpu = None
memory = None
for line in shellutil.run_command(['mount', '-t', 'cgroup']).splitlines():
match = re.search(r'on\s+(?P/\S+(memory|cpuacct))\s', line)
if match is not None:
path = match.group('path')
if 'cpuacct' in path:
cpu = path
else:
memory = path
self._cgroup_mountpoints = {'cpu': cpu, 'memory': memory}
return self._cgroup_mountpoints['cpu'], self._cgroup_mountpoints['memory']
@staticmethod
def get_process_cgroup_relative_paths(process_id):
"""
Returns a tuple with the path of the cpu and memory cgroups for the given process (relative to the mount point of the corresponding
controller).
The 'process_id' can be a numeric PID or the string "self" for the current process.
The values returned can be None if the process is not in a cgroup for that controller (e.g. the controller is not mounted).
"""
# The contents of the file are similar to
# # cat /proc/1218/cgroup
# 10:memory:/system.slice/walinuxagent.service
# 3:cpu,cpuacct:/system.slice/walinuxagent.service
# etc
cpu_path = None
memory_path = None
for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines():
match = re.match(r'\d+:(?P(memory|.*cpuacct.*)):(?P.+)', line)
if match is not None:
controller = match.group('controller')
path = match.group('path').lstrip('/') if match.group('path') != '/' else None
if controller == 'memory':
memory_path = path
else:
cpu_path = path
return cpu_path, memory_path
def get_process_cgroup_paths(self, process_id):
"""
Returns a tuple with the path of the cpu and memory cgroups for the given process. The 'process_id' can be a numeric PID or the string "self" for the current process.
The values returned can be None if the process is not in a cgroup for that controller (e.g. the controller is not mounted).
"""
cpu_cgroup_relative_path, memory_cgroup_relative_path = self.get_process_cgroup_relative_paths(process_id)
cpu_mount_point, memory_mount_point = self.get_cgroup_mount_points()
cpu_cgroup_path = os.path.join(cpu_mount_point, cpu_cgroup_relative_path) \
if cpu_mount_point is not None and cpu_cgroup_relative_path is not None else None
memory_cgroup_path = os.path.join(memory_mount_point, memory_cgroup_relative_path) \
if memory_mount_point is not None and memory_cgroup_relative_path is not None else None
return cpu_cgroup_path, memory_cgroup_path
def get_unit_cgroup_paths(self, unit_name):
"""
Returns a tuple with the path of the cpu and memory cgroups for the given unit.
The values returned can be None if the controller is not mounted.
Ex: ControlGroup=/azure.slice/walinuxagent.service
controlgroup_path[1:] = azure.slice/walinuxagent.service
"""
controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup")
cpu_mount_point, memory_mount_point = self.get_cgroup_mount_points()
cpu_cgroup_path = os.path.join(cpu_mount_point, controlgroup_path[1:]) \
if cpu_mount_point is not None else None
memory_cgroup_path = os.path.join(memory_mount_point, controlgroup_path[1:]) \
if memory_mount_point is not None else None
return cpu_cgroup_path, memory_cgroup_path
@staticmethod
def get_cgroup2_controllers():
"""
Returns a tuple with the mount point for the cgroups v2 controllers, and the currently mounted controllers;
either value can be None if cgroups v2 or its controllers are not mounted
"""
# the output of mount is similar to
# $ mount -t cgroup2
# cgroup2 on /sys/fs/cgroup/unified type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate)
#
for line in shellutil.run_command(['mount', '-t', 'cgroup2']).splitlines():
match = re.search(r'on\s+(?P/\S+)\s', line)
if match is not None:
mount_point = match.group('path')
controllers = None
controllers_file = os.path.join(mount_point, 'cgroup.controllers')
if os.path.exists(controllers_file):
controllers = fileutil.read_file(controllers_file)
return mount_point, controllers
return None, None
@staticmethod
def _is_systemd_failure(scope_name, stderr):
stderr.seek(0)
stderr = ustr(stderr.read(TELEMETRY_MESSAGE_MAX_LEN), encoding='utf-8', errors='backslashreplace')
unit_not_found = "Unit {0} not found.".format(scope_name)
return unit_not_found in stderr or scope_name not in stderr
@staticmethod
def get_extension_slice_name(extension_name, old_slice=False):
# The old slice makes it difficult for user to override the limits because they need to place drop-in files on every upgrade if extension slice is different for each version.
# old slice includes .-
# new slice without version .
if not old_slice:
extension_name = extension_name.rsplit("-", 1)[0]
# Since '-' is used as a separator in systemd unit names, we replace it with '_' to prevent side-effects.
return EXTENSION_SLICE_PREFIX + "-" + extension_name.replace('-', '_') + ".slice"
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
error_code=ExtensionErrorCodes.PluginUnknownFailure):
scope = "{0}_{1}".format(cmd_name, uuid.uuid4())
extension_slice_name = self.get_extension_slice_name(extension_name)
with self._systemd_run_commands_lock:
process = subprocess.Popen( # pylint: disable=W1509
# Some distros like ubuntu20 by default cpu and memory accounting enabled. Thus create nested cgroups under the extension slice
# So disabling CPU and Memory accounting prevents from creating nested cgroups, so that all the counters will be present in extension Cgroup
# since slice unit file configured with accounting enabled.
"systemd-run --property=CPUAccounting=no --property=MemoryAccounting=no --unit={0} --scope --slice={1} {2}".format(scope, extension_slice_name, command),
shell=shell,
cwd=cwd,
stdout=stdout,
stderr=stderr,
env=env,
preexec_fn=os.setsid)
# We start systemd-run with shell == True so process.pid is the shell's pid, not the pid for systemd-run
self._systemd_run_commands.append(process.pid)
scope_name = scope + '.scope'
logger.info("Started extension in unit '{0}'", scope_name)
cpu_cgroup = None
try:
cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name)
cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_cgroup_mount_points()
if cpu_cgroup_mountpoint is None:
logger.info("The CPU controller is not mounted; will not track resource usage")
else:
cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
cpu_cgroup = CpuCgroup(extension_name, cpu_cgroup_path)
CGroupsTelemetry.track_cgroup(cpu_cgroup)
if memory_cgroup_mountpoint is None:
logger.info("The Memory controller is not mounted; will not track resource usage")
else:
memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
memory_cgroup = MemoryCgroup(extension_name, memory_cgroup_path)
CGroupsTelemetry.track_cgroup(memory_cgroup)
except IOError as e:
if e.errno == 2: # 'No such file or directory'
logger.info("The extension command already completed; will not track resource usage")
logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))
except Exception as e:
logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))
# Wait for process completion or timeout
try:
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
stderr=stderr, error_code=error_code, cpu_cgroup=cpu_cgroup)
except ExtensionError as e:
# The extension didn't terminate successfully. Determine whether it was due to systemd errors or
# extension errors.
if not self._is_systemd_failure(scope, stderr):
# There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
raise
# There was an issue with systemd-run. We need to log it and retry the extension without systemd.
process_output = read_output(stdout, stderr)
# Reset the stdout and stderr
stdout.truncate(0)
stderr.truncate(0)
if isinstance(e, ExtensionOperationError):
# no-member: Instance of 'ExtensionError' has no 'exit_code' member (no-member) - Disabled: e is actually an ExtensionOperationError
err_msg = 'Systemd process exited with code %s and output %s' % (
e.exit_code, process_output) # pylint: disable=no-member
else:
err_msg = "Systemd timed-out, output: %s" % process_output
raise SystemdRunError(err_msg)
finally:
with self._systemd_run_commands_lock:
self._systemd_run_commands.remove(process.pid)
def cleanup_legacy_cgroups(self):
"""
Previous versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent;
starting from version 2.2.41 we track the agent service in walinuxagent.service instead of WALinuxAgent/WALinuxAgent. If
we find that any of the legacy groups include the PID of the daemon then we need to disable data collection for this
instance (under systemd, moving PIDs across the cgroup file system can produce unpredictable results)
"""
return CGroupsApi._foreach_legacy_cgroup(lambda *_: None)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/cgroupconfigurator.py 0000664 0000000 0000000 00000160121 14626177470 0025641 0 ustar 00root root 0000000 0000000 # -*- encoding: utf-8 -*-
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import glob
import json
import os
import re
import subprocess
import threading
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup
from azurelinuxagent.ga.cgroupapi import CGroupsApi, SystemdCgroupsApi, SystemdRunError, EXTENSION_SLICE_PREFIX
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.exception import ExtensionErrorCodes, CGroupsException, AgentMemoryExceededException
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil, systemd
from azurelinuxagent.common.version import get_distro
from azurelinuxagent.common.utils import shellutil, fileutil
from azurelinuxagent.ga.extensionprocessutil import handle_process_completion
from azurelinuxagent.common.event import add_event, WALAEventOperation
AZURE_SLICE = "azure.slice"
_AZURE_SLICE_CONTENTS = """
[Unit]
Description=Slice for Azure VM Agent and Extensions
DefaultDependencies=no
Before=slices.target
"""
_VMEXTENSIONS_SLICE = EXTENSION_SLICE_PREFIX + ".slice"
_AZURE_VMEXTENSIONS_SLICE = AZURE_SLICE + "/" + _VMEXTENSIONS_SLICE
_VMEXTENSIONS_SLICE_CONTENTS = """
[Unit]
Description=Slice for Azure VM Extensions
DefaultDependencies=no
Before=slices.target
[Slice]
CPUAccounting=yes
MemoryAccounting=yes
"""
_EXTENSION_SLICE_CONTENTS = """
[Unit]
Description=Slice for Azure VM extension {extension_name}
DefaultDependencies=no
Before=slices.target
[Slice]
CPUAccounting=yes
CPUQuota={cpu_quota}
MemoryAccounting=yes
"""
LOGCOLLECTOR_SLICE = "azure-walinuxagent-logcollector.slice"
# More info on resource limits properties in systemd here:
# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/resource_management_guide/sec-modifying_control_groups
_LOGCOLLECTOR_SLICE_CONTENTS_FMT = """
[Unit]
Description=Slice for Azure VM Agent Periodic Log Collector
DefaultDependencies=no
Before=slices.target
[Slice]
CPUAccounting=yes
CPUQuota={cpu_quota}
MemoryAccounting=yes
"""
_LOGCOLLECTOR_CPU_QUOTA = "5%"
LOGCOLLECTOR_MEMORY_LIMIT = 30 * 1024 ** 2 # 30Mb
_AGENT_DROP_IN_FILE_SLICE = "10-Slice.conf"
_AGENT_DROP_IN_FILE_SLICE_CONTENTS = """
# This drop-in unit file was created by the Azure VM Agent.
# Do not edit.
[Service]
Slice=azure.slice
"""
_DROP_IN_FILE_CPU_ACCOUNTING = "11-CPUAccounting.conf"
_DROP_IN_FILE_CPU_ACCOUNTING_CONTENTS = """
# This drop-in unit file was created by the Azure VM Agent.
# Do not edit.
[Service]
CPUAccounting=yes
"""
_DROP_IN_FILE_CPU_QUOTA = "12-CPUQuota.conf"
_DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT = """
# This drop-in unit file was created by the Azure VM Agent.
# Do not edit.
[Service]
CPUQuota={0}
"""
_DROP_IN_FILE_MEMORY_ACCOUNTING = "13-MemoryAccounting.conf"
_DROP_IN_FILE_MEMORY_ACCOUNTING_CONTENTS = """
# This drop-in unit file was created by the Azure VM Agent.
# Do not edit.
[Service]
MemoryAccounting=yes
"""
class DisableCgroups(object):
ALL = "all"
AGENT = "agent"
EXTENSIONS = "extensions"
def _log_cgroup_info(format_string, *args):
message = format_string.format(*args)
logger.info("[CGI] " + message)
add_event(op=WALAEventOperation.CGroupsInfo, message=message)
def _log_cgroup_warning(format_string, *args):
message = format_string.format(*args)
logger.info("[CGW] " + message) # log as INFO for now, in the future it should be logged as WARNING
add_event(op=WALAEventOperation.CGroupsInfo, message=message, is_success=False, log_event=False)
class CGroupConfigurator(object):
"""
This class implements the high-level operations on CGroups (e.g. initialization, creation, etc)
NOTE: with the exception of start_extension_command, none of the methods in this class
raise exceptions (cgroup operations should not block extensions)
"""
class _Impl(object):
def __init__(self):
self._initialized = False
self._cgroups_supported = False
self._agent_cgroups_enabled = False
self._extensions_cgroups_enabled = False
self._cgroups_api = None
self._agent_cpu_cgroup_path = None
self._agent_memory_cgroup_path = None
self._agent_memory_cgroup = None
self._check_cgroups_lock = threading.RLock() # Protect the check_cgroups which is called from Monitor thread and main loop.
def initialize(self):
try:
if self._initialized:
return
# This check is to reset the quotas if agent goes from cgroup supported to unsupported distros later in time.
if not CGroupsApi.cgroups_supported():
agent_drop_in_path = systemd.get_agent_drop_in_path()
try:
if os.path.exists(agent_drop_in_path) and os.path.isdir(agent_drop_in_path):
files_to_cleanup = []
agent_drop_in_file_slice = os.path.join(agent_drop_in_path, _AGENT_DROP_IN_FILE_SLICE)
agent_drop_in_file_cpu_accounting = os.path.join(agent_drop_in_path,
_DROP_IN_FILE_CPU_ACCOUNTING)
agent_drop_in_file_memory_accounting = os.path.join(agent_drop_in_path,
_DROP_IN_FILE_MEMORY_ACCOUNTING)
agent_drop_in_file_cpu_quota = os.path.join(agent_drop_in_path, _DROP_IN_FILE_CPU_QUOTA)
files_to_cleanup.extend([agent_drop_in_file_slice, agent_drop_in_file_cpu_accounting,
agent_drop_in_file_memory_accounting, agent_drop_in_file_cpu_quota])
self.__cleanup_all_files(files_to_cleanup)
self.__reload_systemd_config()
logger.info("Agent reset the quotas if distro: {0} goes from supported to unsupported list", get_distro())
except Exception as err:
logger.warn("Unable to delete Agent drop-in files while resetting the quotas: {0}".format(err))
# check whether cgroup monitoring is supported on the current distro
self._cgroups_supported = CGroupsApi.cgroups_supported()
if not self._cgroups_supported:
logger.info("Cgroup monitoring is not supported on {0}", get_distro())
return
# check that systemd is detected correctly
self._cgroups_api = SystemdCgroupsApi()
if not systemd.is_systemd():
_log_cgroup_warning("systemd was not detected on {0}", get_distro())
return
_log_cgroup_info("systemd version: {0}", systemd.get_version())
# This is temporarily disabled while we analyze telemetry. Likely it will be removed.
# self.__collect_azure_unit_telemetry()
# self.__collect_agent_unit_files_telemetry()
if not self.__check_no_legacy_cgroups():
return
agent_unit_name = systemd.get_agent_unit_name()
agent_slice = systemd.get_unit_property(agent_unit_name, "Slice")
if agent_slice not in (AZURE_SLICE, "system.slice"):
_log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice)
return
self.__setup_azure_slice()
cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers()
self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice,
cpu_controller_root,
memory_controller_root)
if self._agent_cpu_cgroup_path is not None or self._agent_memory_cgroup_path is not None:
self.enable()
if self._agent_cpu_cgroup_path is not None:
_log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path)
self.__set_cpu_quota(conf.get_agent_cpu_quota())
CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
if self._agent_memory_cgroup_path is not None:
_log_cgroup_info("Agent Memory cgroup: {0}", self._agent_memory_cgroup_path)
self._agent_memory_cgroup = MemoryCgroup(AGENT_NAME_TELEMETRY, self._agent_memory_cgroup_path)
CGroupsTelemetry.track_cgroup(self._agent_memory_cgroup)
_log_cgroup_info('Agent cgroups enabled: {0}', self._agent_cgroups_enabled)
except Exception as exception:
_log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception))
finally:
self._initialized = True
@staticmethod
def __collect_azure_unit_telemetry():
azure_units = []
try:
units = shellutil.run_command(['systemctl', 'list-units', 'azure*', '-all'])
for line in units.split('\n'):
match = re.match(r'\s?(azure[^\s]*)\s?', line, re.IGNORECASE)
if match is not None:
azure_units.append((match.group(1), line))
except shellutil.CommandError as command_error:
_log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error))
for unit_name, unit_description in azure_units:
unit_slice = "Unknown"
try:
unit_slice = systemd.get_unit_property(unit_name, "Slice")
except Exception as exception:
_log_cgroup_warning("Failed to query Slice for {0}: {1}", unit_name, ustr(exception))
_log_cgroup_info("Found an Azure unit under slice {0}: {1}", unit_slice, unit_description)
if len(azure_units) == 0:
try:
cgroups = shellutil.run_command('systemd-cgls')
for line in cgroups.split('\n'):
if re.match(r'[^\x00-\xff]+azure\.slice\s*', line, re.UNICODE):
logger.info(ustr("Found a cgroup for azure.slice\n{0}").format(cgroups))
# Don't add the output of systemd-cgls to the telemetry, since currently it does not support Unicode
add_event(op=WALAEventOperation.CGroupsInfo, message="Found a cgroup for azure.slice")
except shellutil.CommandError as command_error:
_log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error))
@staticmethod
def __collect_agent_unit_files_telemetry():
agent_unit_files = []
agent_service_name = get_osutil().get_service_name()
try:
fragment_path = systemd.get_unit_property(agent_service_name, "FragmentPath")
if fragment_path != systemd.get_agent_unit_file():
agent_unit_files.append(fragment_path)
except Exception as exception:
_log_cgroup_warning("Failed to query the agent's FragmentPath: {0}", ustr(exception))
try:
drop_in_paths = systemd.get_unit_property(agent_service_name, "DropInPaths")
for path in drop_in_paths.split():
agent_unit_files.append(path)
except Exception as exception:
_log_cgroup_warning("Failed to query the agent's DropInPaths: {0}", ustr(exception))
for unit_file in agent_unit_files:
try:
with open(unit_file, "r") as file_object:
_log_cgroup_info("Found a custom unit file for the agent: {0}\n{1}", unit_file,
file_object.read())
except Exception as exception:
_log_cgroup_warning("Can't read {0}: {1}", unit_file, ustr(exception))
def __check_no_legacy_cgroups(self):
"""
Older versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent. When running
under systemd this could produce invalid resource usage data. Cgroups should not be enabled under this condition.
"""
legacy_cgroups = self._cgroups_api.cleanup_legacy_cgroups()
if legacy_cgroups > 0:
_log_cgroup_warning("The daemon's PID was added to a legacy cgroup; will not monitor resource usage.")
return False
return True
def __get_cgroup_controllers(self):
#
# check v1 controllers
#
cpu_controller_root, memory_controller_root = self._cgroups_api.get_cgroup_mount_points()
if cpu_controller_root is not None:
logger.info("The CPU cgroup controller is mounted at {0}", cpu_controller_root)
else:
_log_cgroup_warning("The CPU cgroup controller is not mounted")
if memory_controller_root is not None:
logger.info("The memory cgroup controller is mounted at {0}", memory_controller_root)
else:
_log_cgroup_warning("The memory cgroup controller is not mounted")
#
# check v2 controllers
#
cgroup2_mount_point, cgroup2_controllers = self._cgroups_api.get_cgroup2_controllers()
if cgroup2_mount_point is not None:
_log_cgroup_info("cgroups v2 mounted at {0}. Controllers: [{1}]", cgroup2_mount_point,
cgroup2_controllers)
return cpu_controller_root, memory_controller_root
@staticmethod
def __setup_azure_slice():
"""
The agent creates "azure.slice" for use by extensions and the agent. The agent runs under "azure.slice" directly and each
extension runs under its own slice ("Microsoft.CPlat.Extension.slice" in the example below). All the slices for
extensions are grouped under "vmextensions.slice".
Example: -.slice
├─user.slice
├─system.slice
└─azure.slice
├─walinuxagent.service
│ ├─5759 /usr/bin/python3 -u /usr/sbin/waagent -daemon
│ └─5764 python3 -u bin/WALinuxAgent-2.2.53-py2.7.egg -run-exthandlers
└─azure-vmextensions.slice
└─Microsoft.CPlat.Extension.slice
└─5894 /usr/bin/python3 /var/lib/waagent/Microsoft.CPlat.Extension-1.0.0.0/enable.py
This method ensures that the "azure" and "vmextensions" slices are created. Setup should create those slices
under /lib/systemd/system; but if they do not exist, __ensure_azure_slices_exist will create them.
It also creates drop-in files to set the agent's Slice and CPUAccounting if they have not been
set up in the agent's unit file.
Lastly, the method also cleans up unit files left over from previous versions of the agent.
"""
# Older agents used to create this slice, but it was never used. Cleanup the file.
CGroupConfigurator._Impl.__cleanup_unit_file("/etc/systemd/system/system-walinuxagent.extensions.slice")
unit_file_install_path = systemd.get_unit_file_install_path()
azure_slice = os.path.join(unit_file_install_path, AZURE_SLICE)
vmextensions_slice = os.path.join(unit_file_install_path, _VMEXTENSIONS_SLICE)
logcollector_slice = os.path.join(unit_file_install_path, LOGCOLLECTOR_SLICE)
agent_unit_file = systemd.get_agent_unit_file()
agent_drop_in_path = systemd.get_agent_drop_in_path()
agent_drop_in_file_slice = os.path.join(agent_drop_in_path, _AGENT_DROP_IN_FILE_SLICE)
agent_drop_in_file_cpu_accounting = os.path.join(agent_drop_in_path, _DROP_IN_FILE_CPU_ACCOUNTING)
agent_drop_in_file_memory_accounting = os.path.join(agent_drop_in_path, _DROP_IN_FILE_MEMORY_ACCOUNTING)
files_to_create = []
if not os.path.exists(azure_slice):
files_to_create.append((azure_slice, _AZURE_SLICE_CONTENTS))
if not os.path.exists(vmextensions_slice):
files_to_create.append((vmextensions_slice, _VMEXTENSIONS_SLICE_CONTENTS))
# Update log collector slice contents
slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=_LOGCOLLECTOR_CPU_QUOTA)
files_to_create.append((logcollector_slice, slice_contents))
if fileutil.findre_in_file(agent_unit_file, r"Slice=") is not None:
CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_slice)
else:
if not os.path.exists(agent_drop_in_file_slice):
files_to_create.append((agent_drop_in_file_slice, _AGENT_DROP_IN_FILE_SLICE_CONTENTS))
if fileutil.findre_in_file(agent_unit_file, r"CPUAccounting=") is not None:
CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_cpu_accounting)
else:
if not os.path.exists(agent_drop_in_file_cpu_accounting):
files_to_create.append((agent_drop_in_file_cpu_accounting, _DROP_IN_FILE_CPU_ACCOUNTING_CONTENTS))
if fileutil.findre_in_file(agent_unit_file, r"MemoryAccounting=") is not None:
CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_memory_accounting)
else:
if not os.path.exists(agent_drop_in_file_memory_accounting):
files_to_create.append(
(agent_drop_in_file_memory_accounting, _DROP_IN_FILE_MEMORY_ACCOUNTING_CONTENTS))
if len(files_to_create) > 0:
# create the unit files, but if 1 fails remove all and return
try:
for path, contents in files_to_create:
CGroupConfigurator._Impl.__create_unit_file(path, contents)
except Exception as exception:
_log_cgroup_warning("Failed to create unit files for the azure slice: {0}", ustr(exception))
for unit_file in files_to_create:
CGroupConfigurator._Impl.__cleanup_unit_file(unit_file)
return
CGroupConfigurator._Impl.__reload_systemd_config()
@staticmethod
def __reload_systemd_config():
# reload the systemd configuration; the new slices will be used once the agent's service restarts
try:
logger.info("Executing systemctl daemon-reload...")
shellutil.run_command(["systemctl", "daemon-reload"])
except Exception as exception:
_log_cgroup_warning("daemon-reload failed (create azure slice): {0}", ustr(exception))
@staticmethod
def __create_unit_file(path, contents):
parent, _ = os.path.split(path)
if not os.path.exists(parent):
fileutil.mkdir(parent, mode=0o755)
exists = os.path.exists(path)
fileutil.write_file(path, contents)
_log_cgroup_info("{0} {1}", "Updated" if exists else "Created", path)
@staticmethod
def __cleanup_unit_file(path):
if os.path.exists(path):
try:
os.remove(path)
_log_cgroup_info("Removed {0}", path)
except Exception as exception:
_log_cgroup_warning("Failed to remove {0}: {1}", path, ustr(exception))
@staticmethod
def __cleanup_all_files(files_to_cleanup):
for path in files_to_cleanup:
if os.path.exists(path):
try:
os.remove(path)
_log_cgroup_info("Removed {0}", path)
except Exception as exception:
_log_cgroup_warning("Failed to remove {0}: {1}", path, ustr(exception))
@staticmethod
def __create_all_files(files_to_create):
# create the unit files, but if 1 fails remove all and return
try:
for path, contents in files_to_create:
CGroupConfigurator._Impl.__create_unit_file(path, contents)
except Exception as exception:
_log_cgroup_warning("Failed to create unit files : {0}", ustr(exception))
for unit_file in files_to_create:
CGroupConfigurator._Impl.__cleanup_unit_file(unit_file)
return
def is_extension_resource_limits_setup_completed(self, extension_name, cpu_quota=None):
unit_file_install_path = systemd.get_unit_file_install_path()
old_extension_slice_path = os.path.join(unit_file_install_path, SystemdCgroupsApi.get_extension_slice_name(extension_name, old_slice=True))
# clean up the old slice from the disk
if os.path.exists(old_extension_slice_path):
CGroupConfigurator._Impl.__cleanup_unit_file(old_extension_slice_path)
extension_slice_path = os.path.join(unit_file_install_path,
SystemdCgroupsApi.get_extension_slice_name(extension_name))
cpu_quota = str(
cpu_quota) + "%" if cpu_quota is not None else "" # setting an empty value resets to the default (infinity)
slice_contents = _EXTENSION_SLICE_CONTENTS.format(extension_name=extension_name,
cpu_quota=cpu_quota)
if os.path.exists(extension_slice_path):
with open(extension_slice_path, "r") as file_:
if file_.read() == slice_contents:
return True
return False
def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controller_root):
agent_unit_name = systemd.get_agent_unit_name()
expected_relative_path = os.path.join(agent_slice, agent_unit_name)
cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths(
"self")
if cpu_cgroup_relative_path is None:
_log_cgroup_warning("The agent's process is not within a CPU cgroup")
else:
if cpu_cgroup_relative_path == expected_relative_path:
_log_cgroup_info('CPUAccounting: {0}', systemd.get_unit_property(agent_unit_name, "CPUAccounting"))
_log_cgroup_info('CPUQuota: {0}', systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec"))
else:
_log_cgroup_warning(
"The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]",
cpu_cgroup_relative_path,
expected_relative_path)
cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring
if memory_cgroup_relative_path is None:
_log_cgroup_warning("The agent's process is not within a memory cgroup")
else:
if memory_cgroup_relative_path == expected_relative_path:
memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting")
_log_cgroup_info('MemoryAccounting: {0}', memory_accounting)
else:
_log_cgroup_info(
"The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]",
memory_cgroup_relative_path,
expected_relative_path)
memory_cgroup_relative_path = None # Set the path to None to prevent monitoring
if cpu_controller_root is not None and cpu_cgroup_relative_path is not None:
agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path)
else:
agent_cpu_cgroup_path = None
if memory_controller_root is not None and memory_cgroup_relative_path is not None:
agent_memory_cgroup_path = os.path.join(memory_controller_root, memory_cgroup_relative_path)
else:
agent_memory_cgroup_path = None
return agent_cpu_cgroup_path, agent_memory_cgroup_path
def supported(self):
return self._cgroups_supported
def enabled(self):
return self._agent_cgroups_enabled or self._extensions_cgroups_enabled
def agent_enabled(self):
return self._agent_cgroups_enabled
def extensions_enabled(self):
return self._extensions_cgroups_enabled
def enable(self):
if not self.supported():
raise CGroupsException(
"Attempted to enable cgroups, but they are not supported on the current platform")
self._agent_cgroups_enabled = True
self._extensions_cgroups_enabled = True
def disable(self, reason, disable_cgroups):
if disable_cgroups == DisableCgroups.ALL: # disable all
# Reset quotas
self.__reset_agent_cpu_quota()
extension_services = self.get_extension_services_list()
for extension in extension_services:
logger.info("Resetting extension : {0} and it's services: {1} CPUQuota".format(extension, extension_services[extension]))
self.__reset_extension_cpu_quota(extension_name=extension)
self.__reset_extension_services_cpu_quota(extension_services[extension])
self.__reload_systemd_config()
CGroupsTelemetry.reset()
self._agent_cgroups_enabled = False
self._extensions_cgroups_enabled = False
elif disable_cgroups == DisableCgroups.AGENT: # disable agent
self._agent_cgroups_enabled = False
self.__reset_agent_cpu_quota()
CGroupsTelemetry.stop_tracking(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
message = "[CGW] Disabling resource usage monitoring. Reason: {0}".format(reason)
logger.info(message) # log as INFO for now, in the future it should be logged as WARNING
add_event(op=WALAEventOperation.CGroupsDisabled, message=message, is_success=False, log_event=False)
@staticmethod
def __set_cpu_quota(quota):
"""
Sets the agent's CPU quota to the given percentage (100% == 1 CPU)
NOTE: This is done using a dropin file in the default dropin directory; any local overrides on the VM will take precedence
over this setting.
"""
quota_percentage = "{0}%".format(quota)
_log_cgroup_info("Ensuring the agent's CPUQuota is {0}", quota_percentage)
if CGroupConfigurator._Impl.__try_set_cpu_quota(quota_percentage):
CGroupsTelemetry.set_track_throttled_time(True)
@staticmethod
def __reset_agent_cpu_quota():
"""
Removes any CPUQuota on the agent
NOTE: This resets the quota on the agent's default dropin file; any local overrides on the VM will take precedence
over this setting.
"""
logger.info("Resetting agent's CPUQuota")
if CGroupConfigurator._Impl.__try_set_cpu_quota(''): # setting an empty value resets to the default (infinity)
_log_cgroup_info('CPUQuota: {0}',
systemd.get_unit_property(systemd.get_agent_unit_name(), "CPUQuotaPerSecUSec"))
@staticmethod
def __try_set_cpu_quota(quota):
try:
drop_in_file = os.path.join(systemd.get_agent_drop_in_path(), _DROP_IN_FILE_CPU_QUOTA)
contents = _DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT.format(quota)
if os.path.exists(drop_in_file):
with open(drop_in_file, "r") as file_:
if file_.read() == contents:
return True # no need to update the file; return here to avoid doing a daemon-reload
CGroupConfigurator._Impl.__create_unit_file(drop_in_file, contents)
except Exception as exception:
_log_cgroup_warning('Failed to set CPUQuota: {0}', ustr(exception))
return False
try:
logger.info("Executing systemctl daemon-reload...")
shellutil.run_command(["systemctl", "daemon-reload"])
except Exception as exception:
_log_cgroup_warning("daemon-reload failed (set quota): {0}", ustr(exception))
return False
return True
def check_cgroups(self, cgroup_metrics):
self._check_cgroups_lock.acquire()
try:
if not self.enabled():
return
errors = []
process_check_success = False
try:
self._check_processes_in_agent_cgroup()
process_check_success = True
except CGroupsException as exception:
errors.append(exception)
quota_check_success = False
try:
if cgroup_metrics:
self._check_agent_throttled_time(cgroup_metrics)
quota_check_success = True
except CGroupsException as exception:
errors.append(exception)
reason = "Check on cgroups failed:\n{0}".format("\n".join([ustr(e) for e in errors]))
if not process_check_success and conf.get_cgroup_disable_on_process_check_failure():
self.disable(reason, DisableCgroups.ALL)
if not quota_check_success and conf.get_cgroup_disable_on_quota_check_failure():
self.disable(reason, DisableCgroups.AGENT)
finally:
self._check_cgroups_lock.release()
def _check_processes_in_agent_cgroup(self):
"""
Verifies that the agent's cgroup includes only the current process, its parent, commands started using shellutil and instances of systemd-run
(those processes correspond, respectively, to the extension handler, the daemon, commands started by the extension handler, and the systemd-run
commands used to start extensions on their own cgroup).
Other processes started by the agent (e.g. extensions) and processes not started by the agent (e.g. services installed by extensions) are reported
as unexpected, since they should belong to their own cgroup.
Raises a CGroupsException if the check fails
"""
unexpected = []
agent_cgroup_proc_names = []
try:
daemon = os.getppid()
extension_handler = os.getpid()
agent_commands = set()
agent_commands.update(shellutil.get_running_commands())
systemd_run_commands = set()
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
agent_cgroup = CGroupsApi.get_processes_in_cgroup(self._agent_cpu_cgroup_path)
# get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup;
agent_commands.update(shellutil.get_running_commands())
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
for process in agent_cgroup:
agent_cgroup_proc_names.append(self.__format_process(process))
# Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't.
if process in (daemon, extension_handler) or process in systemd_run_commands:
continue
# check shell systemd_run process if above process check didn't catch it
if self._check_systemd_run_process(process):
continue
# systemd_run_commands contains the shell that started systemd-run, so we also need to check for the parent
if self._get_parent(process) in systemd_run_commands and self._get_command(
process) == 'systemd-run':
continue
# check if the process is a command started by the agent or a descendant of one of those commands
current = process
while current != 0 and current not in agent_commands:
current = self._get_parent(current)
# Verify if Process started by agent based on the marker found in process environment or process is in Zombie state.
# If so, consider it as valid process in agent cgroup.
if current == 0 and not (self.__is_process_descendant_of_the_agent(process) or self.__is_zombie_process(process)):
unexpected.append(self.__format_process(process))
if len(unexpected) >= 5: # collect just a small sample
break
except Exception as exception:
_log_cgroup_warning("Error checking the processes in the agent's cgroup: {0}".format(ustr(exception)))
if len(unexpected) > 0:
self._report_agent_cgroups_procs(agent_cgroup_proc_names, unexpected)
raise CGroupsException("The agent's cgroup includes unexpected processes: {0}".format(unexpected))
@staticmethod
def _get_command(pid):
try:
with open('/proc/{0}/comm'.format(pid), "r") as file_:
comm = file_.read()
if comm and comm[-1] == '\x00': # if null-terminated, remove the null
comm = comm[:-1]
return comm.rstrip()
except Exception:
return "UNKNOWN"
@staticmethod
def __format_process(pid):
"""
Formats the given PID as a string containing the PID and the corresponding command line truncated to 64 chars
"""
try:
cmdline = '/proc/{0}/cmdline'.format(pid)
if os.path.exists(cmdline):
with open(cmdline, "r") as cmdline_file:
return "[PID: {0}] {1:64.64}".format(pid, cmdline_file.read())
except Exception:
pass
return "[PID: {0}] UNKNOWN".format(pid)
@staticmethod
def __is_process_descendant_of_the_agent(pid):
"""
Returns True if the process is descendant of the agent by looking at the env flag(AZURE_GUEST_AGENT_PARENT_PROCESS_NAME)
that we set when the process starts otherwise False.
"""
try:
env = '/proc/{0}/environ'.format(pid)
if os.path.exists(env):
with open(env, "r") as env_file:
environ = env_file.read()
if environ and environ[-1] == '\x00':
environ = environ[:-1]
return "{0}={1}".format(shellutil.PARENT_PROCESS_NAME, shellutil.AZURE_GUEST_AGENT) in environ
except Exception:
pass
return False
@staticmethod
def __is_zombie_process(pid):
"""
Returns True if process is in Zombie state otherwise False.
Ex: cat /proc/18171/stat
18171 (python3) S 18103 18103 18103 0 -1 4194624 57736 64902 0 3
"""
try:
stat = '/proc/{0}/stat'.format(pid)
if os.path.exists(stat):
with open(stat, "r") as stat_file:
return stat_file.read().split()[2] == 'Z'
except Exception:
pass
return False
@staticmethod
def _check_systemd_run_process(process):
"""
Returns True if process is shell systemd-run process started by agent otherwise False.
Ex: sh,7345 -c systemd-run --unit=enable_7c5cab19-eb79-4661-95d9-9e5091bd5ae0 --scope --slice=azure-vmextensions-Microsoft.OSTCExtensions.VMAccessForLinux_1.5.11.slice /var/lib/waagent/Microsoft.OSTCExtensions.VMAccessForLinux-1.5.11/processes.sh
"""
try:
process_name = "UNKNOWN"
cmdline = '/proc/{0}/cmdline'.format(process)
if os.path.exists(cmdline):
with open(cmdline, "r") as cmdline_file:
process_name = "{0}".format(cmdline_file.read())
match = re.search(r'systemd-run.*--unit=.*--scope.*--slice=azure-vmextensions.*', process_name)
if match is not None:
return True
except Exception:
pass
return False
@staticmethod
def _report_agent_cgroups_procs(agent_cgroup_proc_names, unexpected):
for proc_name in unexpected:
if 'UNKNOWN' in proc_name:
msg = "Agent includes following processes when UNKNOWN process found: {0}".format("\n".join([ustr(proc) for proc in agent_cgroup_proc_names]))
add_event(op=WALAEventOperation.CGroupsInfo, message=msg)
@staticmethod
def _check_agent_throttled_time(cgroup_metrics):
for metric in cgroup_metrics:
if metric.instance == AGENT_NAME_TELEMETRY and metric.counter == MetricsCounter.THROTTLED_TIME:
if metric.value > conf.get_agent_cpu_throttled_time_threshold():
raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value))
def check_agent_memory_usage(self):
if self.enabled() and self._agent_memory_cgroup:
metrics = self._agent_memory_cgroup.get_tracked_metrics()
current_usage = 0
for metric in metrics:
if metric.counter == MetricsCounter.TOTAL_MEM_USAGE:
current_usage += metric.value
elif metric.counter == MetricsCounter.SWAP_MEM_USAGE:
current_usage += metric.value
if current_usage > conf.get_agent_memory_quota():
raise AgentMemoryExceededException("The agent memory limit {0} bytes exceeded. The current reported usage is {1} bytes.".format(conf.get_agent_memory_quota(), current_usage))
@staticmethod
def _get_parent(pid):
"""
Returns the parent of the given process. If the parent cannot be determined returns 0 (which is the PID for the scheduler)
"""
try:
stat = '/proc/{0}/stat'.format(pid)
if os.path.exists(stat):
with open(stat, "r") as stat_file:
return int(stat_file.read().split()[3])
except Exception:
pass
return 0
def start_tracking_unit_cgroups(self, unit_name):
"""
TODO: Start tracking Memory Cgroups
"""
try:
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name)
if cpu_cgroup_path is None:
logger.info("The CPU controller is not mounted; will not track resource usage")
else:
CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path))
if memory_cgroup_path is None:
logger.info("The Memory controller is not mounted; will not track resource usage")
else:
CGroupsTelemetry.track_cgroup(MemoryCgroup(unit_name, memory_cgroup_path))
except Exception as exception:
logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(exception))
def stop_tracking_unit_cgroups(self, unit_name):
"""
TODO: remove Memory cgroups from tracked list.
"""
try:
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name)
if cpu_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(CpuCgroup(unit_name, cpu_cgroup_path))
if memory_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(MemoryCgroup(unit_name, memory_cgroup_path))
except Exception as exception:
logger.info("Failed to stop tracking resource usage for the extension service: {0}", ustr(exception))
def stop_tracking_extension_cgroups(self, extension_name):
"""
TODO: remove extension Memory cgroups from tracked list
"""
try:
extension_slice_name = SystemdCgroupsApi.get_extension_slice_name(extension_name)
cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE,
extension_slice_name)
cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self._cgroups_api.get_cgroup_mount_points()
cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
if cpu_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(CpuCgroup(extension_name, cpu_cgroup_path))
if memory_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(MemoryCgroup(extension_name, memory_cgroup_path))
except Exception as exception:
logger.info("Failed to stop tracking resource usage for the extension service: {0}", ustr(exception))
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
error_code=ExtensionErrorCodes.PluginUnknownFailure):
"""
Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup
:param extension_name: The extension executing the command
:param command: The command to invoke
:param cmd_name: The type of the command(enable, install, etc.)
:param timeout: Number of seconds to wait for command completion
:param cwd: The working directory for the command
:param env: The environment to pass to the command's process
:param stdout: File object to redirect stdout to
:param stderr: File object to redirect stderr to
:param stderr: File object to redirect stderr to
:param error_code: Extension error code to raise in case of error
"""
if self.enabled():
try:
return self._cgroups_api.start_extension_command(extension_name, command, cmd_name, timeout,
shell=shell, cwd=cwd, env=env, stdout=stdout,
stderr=stderr, error_code=error_code)
except SystemdRunError as exception:
reason = 'Failed to start {0} using systemd-run, will try invoking the extension directly. Error: {1}'.format(
extension_name, ustr(exception))
self.disable(reason, DisableCgroups.ALL)
# fall-through and re-invoke the extension
# subprocess-popen-preexec-fn Disabled: code is not multi-threaded
process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) # pylint: disable=W1509
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code)
def __reset_extension_cpu_quota(self, extension_name):
"""
Removes any CPUQuota on the extension
NOTE: This resets the quota on the extension's slice; any local overrides on the VM will take precedence
over this setting.
"""
if self.enabled():
self.setup_extension_slice(extension_name, cpu_quota=None)
def setup_extension_slice(self, extension_name, cpu_quota):
"""
Each extension runs under its own slice (Ex "Microsoft.CPlat.Extension.slice"). All the slices for
extensions are grouped under "azure-vmextensions.slice.
This method ensures that the extension slice is created. Setup should create
under /lib/systemd/system if it is not exist.
TODO: set memory quotas
"""
if self.enabled():
unit_file_install_path = systemd.get_unit_file_install_path()
extension_slice_path = os.path.join(unit_file_install_path,
SystemdCgroupsApi.get_extension_slice_name(extension_name))
try:
cpu_quota = str(cpu_quota) + "%" if cpu_quota is not None else "" # setting an empty value resets to the default (infinity)
if cpu_quota == "":
_log_cgroup_info("CPUQuota not set for {0}", extension_name)
else:
_log_cgroup_info("Ensuring the {0}'s CPUQuota is {1}", extension_name, cpu_quota)
slice_contents = _EXTENSION_SLICE_CONTENTS.format(extension_name=extension_name,
cpu_quota=cpu_quota)
CGroupConfigurator._Impl.__create_unit_file(extension_slice_path, slice_contents)
except Exception as exception:
_log_cgroup_warning("Failed to set the extension {0} slice and quotas: {1}", extension_name,
ustr(exception))
CGroupConfigurator._Impl.__cleanup_unit_file(extension_slice_path)
def remove_extension_slice(self, extension_name):
"""
This method ensures that the extension slice gets removed from /lib/systemd/system if it exist
Lastly stop the unit. This would ensure the cleanup the /sys/fs/cgroup controller paths
"""
if self.enabled():
unit_file_install_path = systemd.get_unit_file_install_path()
extension_slice_name = SystemdCgroupsApi.get_extension_slice_name(extension_name)
extension_slice_path = os.path.join(unit_file_install_path, extension_slice_name)
if os.path.exists(extension_slice_path):
self.stop_tracking_extension_cgroups(extension_name)
CGroupConfigurator._Impl.__cleanup_unit_file(extension_slice_path)
def set_extension_services_cpu_memory_quota(self, services_list):
"""
Each extension service will have name, systemd path and it's quotas.
This method ensures that drop-in files are created under service.d folder if quotas given.
ex: /lib/systemd/system/extension.service.d/11-CPUAccounting.conf
TODO: set memory quotas
"""
if self.enabled() and services_list is not None:
for service in services_list:
service_name = service.get('name', None)
unit_file_path = systemd.get_unit_file_install_path()
if service_name is not None and unit_file_path is not None:
files_to_create = []
drop_in_path = os.path.join(unit_file_path, "{0}.d".format(service_name))
drop_in_file_cpu_accounting = os.path.join(drop_in_path,
_DROP_IN_FILE_CPU_ACCOUNTING)
files_to_create.append((drop_in_file_cpu_accounting, _DROP_IN_FILE_CPU_ACCOUNTING_CONTENTS))
drop_in_file_memory_accounting = os.path.join(drop_in_path,
_DROP_IN_FILE_MEMORY_ACCOUNTING)
files_to_create.append(
(drop_in_file_memory_accounting, _DROP_IN_FILE_MEMORY_ACCOUNTING_CONTENTS))
cpu_quota = service.get('cpuQuotaPercentage', None)
if cpu_quota is not None:
cpu_quota = str(cpu_quota) + "%"
_log_cgroup_info("Ensuring the {0}'s CPUQuota is {1}", service_name, cpu_quota)
drop_in_file_cpu_quota = os.path.join(drop_in_path, _DROP_IN_FILE_CPU_QUOTA)
cpu_quota_contents = _DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT.format(cpu_quota)
files_to_create.append((drop_in_file_cpu_quota, cpu_quota_contents))
self.__create_all_files(files_to_create)
self.__reload_systemd_config()
def __reset_extension_services_cpu_quota(self, services_list):
"""
Removes any CPUQuota on the extension service
NOTE: This resets the quota on the extension service's default dropin file; any local overrides on the VM will take precedence
over this setting.
"""
if self.enabled() and services_list is not None:
service_name = None
try:
for service in services_list:
service_name = service.get('name', None)
unit_file_path = systemd.get_unit_file_install_path()
if service_name is not None and unit_file_path is not None:
files_to_create = []
drop_in_path = os.path.join(unit_file_path, "{0}.d".format(service_name))
cpu_quota = "" # setting an empty value resets to the default (infinity)
drop_in_file_cpu_quota = os.path.join(drop_in_path, _DROP_IN_FILE_CPU_QUOTA)
cpu_quota_contents = _DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT.format(cpu_quota)
if os.path.exists(drop_in_file_cpu_quota):
with open(drop_in_file_cpu_quota, "r") as file_:
if file_.read() == cpu_quota_contents:
return
files_to_create.append((drop_in_file_cpu_quota, cpu_quota_contents))
self.__create_all_files(files_to_create)
except Exception as exception:
_log_cgroup_warning('Failed to reset CPUQuota for {0} : {1}', service_name, ustr(exception))
def remove_extension_services_drop_in_files(self, services_list):
"""
Remove the dropin files from service .d folder for the given service
"""
if services_list is not None:
for service in services_list:
service_name = service.get('name', None)
unit_file_path = systemd.get_unit_file_install_path()
if service_name is not None and unit_file_path is not None:
files_to_cleanup = []
drop_in_path = os.path.join(unit_file_path, "{0}.d".format(service_name))
drop_in_file_cpu_accounting = os.path.join(drop_in_path,
_DROP_IN_FILE_CPU_ACCOUNTING)
files_to_cleanup.append(drop_in_file_cpu_accounting)
drop_in_file_memory_accounting = os.path.join(drop_in_path,
_DROP_IN_FILE_MEMORY_ACCOUNTING)
files_to_cleanup.append(drop_in_file_memory_accounting)
cpu_quota = service.get('cpuQuotaPercentage', None)
if cpu_quota is not None:
drop_in_file_cpu_quota = os.path.join(drop_in_path, _DROP_IN_FILE_CPU_QUOTA)
files_to_cleanup.append(drop_in_file_cpu_quota)
CGroupConfigurator._Impl.__cleanup_all_files(files_to_cleanup)
_log_cgroup_info("Drop in files removed for {0}".format(service_name))
def stop_tracking_extension_services_cgroups(self, services_list):
"""
Remove the cgroup entry from the tracked groups to stop tracking.
"""
if self.enabled() and services_list is not None:
for service in services_list:
service_name = service.get('name', None)
if service_name is not None:
self.stop_tracking_unit_cgroups(service_name)
def start_tracking_extension_services_cgroups(self, services_list):
"""
Add the cgroup entry to start tracking the services cgroups.
"""
if self.enabled() and services_list is not None:
for service in services_list:
service_name = service.get('name', None)
if service_name is not None:
self.start_tracking_unit_cgroups(service_name)
@staticmethod
def get_extension_services_list():
"""
ResourceLimits for extensions are coming from /HandlerManifest.json file.
Use this pattern to determine all the installed extension HandlerManifest files and
read the extension services if ResourceLimits are present.
"""
extensions_services = {}
for manifest_path in glob.iglob(os.path.join(conf.get_lib_dir(), "*/HandlerManifest.json")):
match = re.search("(?P[\\w+\\.-]+).HandlerManifest\\.json", manifest_path)
if match is not None:
extensions_name = match.group('extname')
if not extensions_name.startswith('WALinuxAgent'):
try:
data = json.loads(fileutil.read_file(manifest_path))
resource_limits = data[0].get('resourceLimits', None)
services = resource_limits.get('services') if resource_limits else None
extensions_services[extensions_name] = services
except (IOError, OSError) as e:
_log_cgroup_warning(
'Failed to load manifest file ({0}): {1}'.format(manifest_path, e.strerror))
except ValueError:
_log_cgroup_warning('Malformed manifest file ({0}).'.format(manifest_path))
return extensions_services
# unique instance for the singleton
_instance = None
@staticmethod
def get_instance():
if CGroupConfigurator._instance is None:
CGroupConfigurator._instance = CGroupConfigurator._Impl()
return CGroupConfigurator._instance
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/cgroupstelemetry.py 0000664 0000000 0000000 00000007520 14626177470 0025337 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import errno
import threading
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup
from azurelinuxagent.common.future import ustr
class CGroupsTelemetry(object):
"""
"""
_tracked = {}
_track_throttled_time = False
_rlock = threading.RLock()
@staticmethod
def set_track_throttled_time(value):
CGroupsTelemetry._track_throttled_time = value
@staticmethod
def get_track_throttled_time():
return CGroupsTelemetry._track_throttled_time
@staticmethod
def track_cgroup(cgroup):
"""
Adds the given item to the dictionary of tracked cgroups
"""
if isinstance(cgroup, CpuCgroup):
# set the current cpu usage
cgroup.initialize_cpu_usage()
with CGroupsTelemetry._rlock:
if not CGroupsTelemetry.is_tracked(cgroup.path):
CGroupsTelemetry._tracked[cgroup.path] = cgroup
logger.info("Started tracking cgroup {0}", cgroup)
@staticmethod
def is_tracked(path):
"""
Returns true if the given item is in the list of tracked items
O(1) operation.
"""
with CGroupsTelemetry._rlock:
if path in CGroupsTelemetry._tracked:
return True
return False
@staticmethod
def stop_tracking(cgroup):
"""
Stop tracking the cgroups for the given path
"""
with CGroupsTelemetry._rlock:
if cgroup.path in CGroupsTelemetry._tracked:
CGroupsTelemetry._tracked.pop(cgroup.path)
logger.info("Stopped tracking cgroup {0}", cgroup)
@staticmethod
def poll_all_tracked():
metrics = []
inactive_cgroups = []
with CGroupsTelemetry._rlock:
for cgroup in CGroupsTelemetry._tracked.values():
try:
metrics.extend(cgroup.get_tracked_metrics(track_throttled_time=CGroupsTelemetry._track_throttled_time))
except Exception as e:
# There can be scenarios when the CGroup has been deleted by the time we are fetching the values
# from it. This would raise IOError with file entry not found (ERRNO: 2). We do not want to log
# every occurrences of such case as it would be very verbose. We do want to log all the other
# exceptions which could occur, which is why we do a periodic log for all the other errors.
if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101
logger.periodic_warn(logger.EVERY_HOUR, '[PERIODIC] Could not collect metrics for cgroup '
'{0}. Error : {1}'.format(cgroup.name, ustr(e)))
if not cgroup.is_active():
inactive_cgroups.append(cgroup)
for inactive_cgroup in inactive_cgroups:
CGroupsTelemetry.stop_tracking(inactive_cgroup)
return metrics
@staticmethod
def reset():
with CGroupsTelemetry._rlock:
CGroupsTelemetry._tracked.clear() # emptying the dictionary
CGroupsTelemetry._track_throttled_time = False
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/collect_logs.py 0000664 0000000 0000000 00000033734 14626177470 0024401 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import datetime
import os
import sys
import threading
import time
from azurelinuxagent.ga import logcollector, cgroupconfigurator
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import MetricsCounter
from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface
from azurelinuxagent.ga.logcollector import COMPRESSED_ARCHIVE_PATH, GRACEFUL_KILL_ERRCODE
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, LOGCOLLECTOR_MEMORY_LIMIT
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.utils.shellutil import CommandError
from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, AGENT_NAME, CURRENT_VERSION
_INITIAL_LOG_COLLECTION_DELAY = 5 * 60 # Five minutes of delay
def get_collect_logs_handler():
return CollectLogsHandler()
def is_log_collection_allowed():
# There are three conditions that need to be met in order to allow periodic log collection:
# 1) It should be enabled in the configuration.
# 2) The system must be using cgroups to manage services. Needed for resource limiting of the log collection.
# 3) The python version must be greater than 2.6 in order to support the ZipFile library used when collecting.
conf_enabled = conf.get_collect_logs()
cgroups_enabled = CGroupConfigurator.get_instance().enabled()
supported_python = PY_VERSION_MINOR >= 6 if PY_VERSION_MAJOR == 2 else PY_VERSION_MAJOR == 3
is_allowed = conf_enabled and cgroups_enabled and supported_python
msg = "Checking if log collection is allowed at this time [{0}]. All three conditions must be met: " \
"configuration enabled [{1}], cgroups enabled [{2}], python supported: [{3}]".format(is_allowed,
conf_enabled,
cgroups_enabled,
supported_python)
logger.info(msg)
add_event(
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.LogCollection,
is_success=is_allowed,
message=msg,
log_event=False)
return is_allowed
class CollectLogsHandler(ThreadHandlerInterface):
"""
Periodically collects and uploads logs from the VM to the host.
"""
_THREAD_NAME = "CollectLogsHandler"
__CGROUPS_FLAG_ENV_VARIABLE = "_AZURE_GUEST_AGENT_LOG_COLLECTOR_MONITOR_CGROUPS_"
@staticmethod
def get_thread_name():
return CollectLogsHandler._THREAD_NAME
@staticmethod
def enable_monitor_cgroups_check():
os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] = "1"
@staticmethod
def disable_monitor_cgroups_check():
if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ:
del os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE]
@staticmethod
def is_enabled_monitor_cgroups_check():
if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ:
return os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] == "1"
return False
def __init__(self):
self.protocol = None
self.protocol_util = None
self.event_thread = None
self.should_run = True
self.last_state = None
self.period = conf.get_collect_logs_period()
def run(self):
self.start()
def keep_alive(self):
return self.should_run
def is_alive(self):
return self.event_thread.is_alive()
def start(self):
self.event_thread = threading.Thread(target=self.daemon)
self.event_thread.setDaemon(True)
self.event_thread.setName(self.get_thread_name())
self.event_thread.start()
def join(self):
self.event_thread.join()
def stopped(self):
return not self.should_run
def stop(self):
self.should_run = False
if self.is_alive():
try:
self.join()
except RuntimeError:
pass
def init_protocols(self):
# The initialization of ProtocolUtil for the log collection thread should be done within the thread itself
# rather than initializing it in the ExtHandler thread. This is done to avoid any concurrency issues as each
# thread would now have its own ProtocolUtil object as per the SingletonPerThread model.
self.protocol_util = get_protocol_util()
self.protocol = self.protocol_util.get_protocol()
def daemon(self):
# Delay the first collector on start up to give short lived VMs (that might be dead before the second
# collection has a chance to run) an opportunity to do produce meaningful logs to collect.
time.sleep(_INITIAL_LOG_COLLECTION_DELAY)
try:
CollectLogsHandler.enable_monitor_cgroups_check()
if self.protocol_util is None or self.protocol is None:
self.init_protocols()
while not self.stopped():
try:
self.collect_and_send_logs()
except Exception as e:
logger.error("An error occurred in the log collection thread main loop; "
"will skip the current iteration.\n{0}", ustr(e))
finally:
time.sleep(self.period)
except Exception as e:
logger.error("An error occurred in the log collection thread; will exit the thread.\n{0}", ustr(e))
finally:
CollectLogsHandler.disable_monitor_cgroups_check()
def collect_and_send_logs(self):
if self._collect_logs():
self._send_logs()
def _collect_logs(self):
logger.info("Starting log collection...")
# Invoke the command line tool in the agent to collect logs, with resource limits on CPU.
# Some distros like ubuntu20 by default cpu and memory accounting enabled. Thus create nested cgroups under the logcollector slice
# So disabling CPU and Memory accounting prevents from creating nested cgroups, so that all the counters will be present in logcollector Cgroup
systemd_cmd = [
"systemd-run", "--property=CPUAccounting=no", "--property=MemoryAccounting=no",
"--unit={0}".format(logcollector.CGROUPS_UNIT),
"--slice={0}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE), "--scope"
]
# The log tool is invoked from the current agent's egg with the command line option
collect_logs_cmd = [sys.executable, "-u", sys.argv[0], "-collect-logs"]
final_command = systemd_cmd + collect_logs_cmd
def exec_command():
start_time = datetime.datetime.utcnow()
success = False
msg = None
try:
shellutil.run_command(final_command, log_error=False)
duration = elapsed_milliseconds(start_time)
archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH)
msg = "Successfully collected logs. Archive size: {0} b, elapsed time: {1} ms.".format(archive_size,
duration)
logger.info(msg)
success = True
return True
except Exception as e:
duration = elapsed_milliseconds(start_time)
err_msg = ustr(e)
if isinstance(e, CommandError):
# pylint has limited (i.e. no) awareness of control flow w.r.t. typing. we disable=no-member
# here because we know e must be a CommandError but pylint still considers the case where
# e is a different type of exception.
err_msg = ustr("Log Collector exited with code {0}").format(
e.returncode) # pylint: disable=no-member
if e.returncode == logcollector.INVALID_CGROUPS_ERRCODE: # pylint: disable=no-member
logger.info("Disabling periodic log collection until service restart due to process error.")
self.stop()
# When the log collector memory limit is exceeded, Agent gracefully exit the process with this error code.
# Stop the periodic operation because it seems to be persistent.
elif e.returncode == logcollector.GRACEFUL_KILL_ERRCODE: # pylint: disable=no-member
logger.info("Disabling periodic log collection until service restart due to exceeded process memory limit.")
self.stop()
else:
logger.info(err_msg)
msg = "Failed to collect logs. Elapsed time: {0} ms. Error: {1}".format(duration, err_msg)
# No need to log to the local log since we logged stdout, stderr from the process.
return False
finally:
add_event(
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.LogCollection,
is_success=success,
message=msg,
log_event=False)
return exec_command()
def _send_logs(self):
msg = None
success = False
try:
with open(COMPRESSED_ARCHIVE_PATH, "rb") as fh:
archive_content = fh.read()
self.protocol.upload_logs(archive_content)
msg = "Successfully uploaded logs."
logger.info(msg)
success = True
except Exception as e:
msg = "Failed to upload logs. Error: {0}".format(ustr(e))
logger.warn(msg)
finally:
add_event(
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.LogCollection,
is_success=success,
message=msg,
log_event=False)
def get_log_collector_monitor_handler(cgroups):
return LogCollectorMonitorHandler(cgroups)
class LogCollectorMonitorHandler(ThreadHandlerInterface):
"""
Periodically monitor and checks the Log collector Cgroups and sends telemetry to Kusto.
"""
_THREAD_NAME = "LogCollectorMonitorHandler"
@staticmethod
def get_thread_name():
return LogCollectorMonitorHandler._THREAD_NAME
def __init__(self, cgroups):
self.event_thread = None
self.should_run = True
self.period = 2 # Log collector monitor runs every 2 secs.
self.cgroups = cgroups
self.__log_metrics = conf.get_cgroup_log_metrics()
def run(self):
self.start()
def stop(self):
self.should_run = False
if self.is_alive():
self.join()
def join(self):
self.event_thread.join()
def stopped(self):
return not self.should_run
def is_alive(self):
return self.event_thread is not None and self.event_thread.is_alive()
def start(self):
self.event_thread = threading.Thread(target=self.daemon)
self.event_thread.setDaemon(True)
self.event_thread.setName(self.get_thread_name())
self.event_thread.start()
def daemon(self):
try:
while not self.stopped():
try:
metrics = self._poll_resource_usage()
self._send_telemetry(metrics)
self._verify_memory_limit(metrics)
except Exception as e:
logger.error("An error occurred in the log collection monitor thread loop; "
"will skip the current iteration.\n{0}", ustr(e))
finally:
time.sleep(self.period)
except Exception as e:
logger.error(
"An error occurred in the MonitorLogCollectorCgroupsHandler thread; will exit the thread.\n{0}",
ustr(e))
def _poll_resource_usage(self):
metrics = []
for cgroup in self.cgroups:
metrics.extend(cgroup.get_tracked_metrics(track_throttled_time=True))
return metrics
def _send_telemetry(self, metrics):
for metric in metrics:
report_metric(metric.category, metric.counter, metric.instance, metric.value, log_event=self.__log_metrics)
def _verify_memory_limit(self, metrics):
current_usage = 0
for metric in metrics:
if metric.counter == MetricsCounter.TOTAL_MEM_USAGE:
current_usage += metric.value
elif metric.counter == MetricsCounter.SWAP_MEM_USAGE:
current_usage += metric.value
if current_usage > LOGCOLLECTOR_MEMORY_LIMIT:
msg = "Log collector memory limit {0} bytes exceeded. The max reported usage is {1} bytes.".format(LOGCOLLECTOR_MEMORY_LIMIT, current_usage)
logger.info(msg)
add_event(
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.LogCollection,
message=msg)
os._exit(GRACEFUL_KILL_ERRCODE)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/collect_telemetry_events.py 0000664 0000000 0000000 00000070064 14626177470 0027030 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import datetime
import json
import os
import re
import threading
from collections import defaultdict
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common import conf
from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames
from azurelinuxagent.common.event import EVENTS_DIRECTORY, TELEMETRY_LOG_EVENT_ID, \
TELEMETRY_LOG_PROVIDER_ID, add_event, WALAEventOperation, add_log_event, get_event_logger, \
CollectOrReportEventDebugInfo, EVENT_FILE_REGEX, parse_event
from azurelinuxagent.common.exception import InvalidExtensionEventError, ServiceStoppedError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface
from azurelinuxagent.common.telemetryevent import TelemetryEvent, TelemetryEventParam, \
GuestAgentGenericLogsSchema, GuestAgentExtensionEventsSchema
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.ga.exthandlers import HANDLER_NAME_PATTERN
from azurelinuxagent.ga.periodic_operation import PeriodicOperation
def get_collect_telemetry_events_handler(send_telemetry_events_handler):
return CollectTelemetryEventsHandler(send_telemetry_events_handler)
class ExtensionEventSchema(object):
"""
Class for defining the schema for Extension Events.
Sample Extension Event Example:
{
"Version":"1.0.0.23",
"Timestamp":"2018-01-02T22:08:12.510696Z" //(time in UTC (ISO-8601 standard),
"TaskName":"TestRun" //Open for publishers,
"EventLevel":"Critical/Error/Warning/Verbose/Informational/LogAlways",
"Message": "Successful test" //(max 3K, 3072 characters),
"EventPid":"1",
"EventTid":"2",
"OperationId":"Guid (str)"
}
From next version(2.10+) we accept integer values for EventPid and EventTid fields. But we still support string type for backward compatability
"""
Version = "Version"
Timestamp = "Timestamp"
TaskName = "TaskName"
EventLevel = "EventLevel"
Message = "Message"
EventPid = "EventPid"
EventTid = "EventTid"
OperationId = "OperationId"
class _ProcessExtensionEvents(PeriodicOperation):
"""
Periodic operation for collecting extension telemetry events and enqueueing them for the SendTelemetryHandler thread.
"""
_EXTENSION_EVENT_COLLECTION_PERIOD = datetime.timedelta(seconds=conf.get_etp_collection_period())
_EXTENSION_EVENT_FILE_NAME_REGEX = re.compile(r"^(\d+)\.json$", re.IGNORECASE)
# Limits
_MAX_NUMBER_OF_EVENTS_PER_EXTENSION_PER_PERIOD = 360
_EXTENSION_EVENT_FILE_MAX_SIZE = 4 * 1024 * 1024 # 4 MB = 4 * 1,048,576 Bytes
_EXTENSION_EVENT_MAX_SIZE = 1024 * 6 # 6Kb or 6144 characters. Limit for the whole event. Prevent oversized events.
_EXTENSION_EVENT_MAX_MSG_LEN = 1024 * 3 # 3Kb or 3072 chars.
_EXTENSION_EVENT_REQUIRED_FIELDS = [attr.lower() for attr in dir(ExtensionEventSchema) if
not callable(getattr(ExtensionEventSchema, attr)) and not attr.startswith("__")]
def __init__(self, send_telemetry_events_handler):
super(_ProcessExtensionEvents, self).__init__(_ProcessExtensionEvents._EXTENSION_EVENT_COLLECTION_PERIOD)
self._send_telemetry_events_handler = send_telemetry_events_handler
def _operation(self):
if self._send_telemetry_events_handler.stopped():
logger.warn("{0} service is not running, skipping current iteration".format(
self._send_telemetry_events_handler.get_thread_name()))
return
delete_all_event_files = True
extension_handler_with_event_dirs = []
try:
extension_handler_with_event_dirs = self._get_extension_events_dir_with_handler_name(conf.get_ext_log_dir())
if not extension_handler_with_event_dirs:
logger.verbose("No Extension events directory exist")
return
for extension_handler_with_event_dir in extension_handler_with_event_dirs:
handler_name = extension_handler_with_event_dir[0]
handler_event_dir_path = extension_handler_with_event_dir[1]
self._capture_extension_events(handler_name, handler_event_dir_path)
except ServiceStoppedError:
# Since the service stopped, we should not delete the extension files and retry sending them whenever
# the telemetry service comes back up
delete_all_event_files = False
except Exception as error:
msg = "Unknown error occurred when trying to collect extension events:{0}".format(
textutil.format_exception(error))
add_event(op=WALAEventOperation.ExtensionTelemetryEventProcessing, message=msg, is_success=False)
finally:
# Always ensure that the events directory are being deleted each run except when Telemetry Service is stopped,
# even if we run into an error and dont process them this run.
if delete_all_event_files:
self._ensure_all_events_directories_empty(extension_handler_with_event_dirs)
@staticmethod
def _get_extension_events_dir_with_handler_name(extension_log_dir):
"""
Get the full path to events directory for all extension handlers that have one
:param extension_log_dir: Base log directory for all extensions
:return: A list of full paths of existing events directory for all handlers
"""
extension_handler_with_event_dirs = []
for ext_handler_name in os.listdir(extension_log_dir):
# Check if its an Extension directory
if not os.path.isdir(os.path.join(extension_log_dir, ext_handler_name)) \
or re.match(HANDLER_NAME_PATTERN, ext_handler_name) is None:
continue
# Check if EVENTS_DIRECTORY directory exists
extension_event_dir = os.path.join(extension_log_dir, ext_handler_name, EVENTS_DIRECTORY)
if os.path.exists(extension_event_dir):
extension_handler_with_event_dirs.append((ext_handler_name, extension_event_dir))
return extension_handler_with_event_dirs
def _event_file_size_allowed(self, event_file_path):
event_file_size = os.stat(event_file_path).st_size
if event_file_size > self._EXTENSION_EVENT_FILE_MAX_SIZE:
convert_to_mb = lambda x: (1.0 * x) / (1000 * 1000)
msg = "Skipping file: {0} as its size is {1:.2f} Mb > Max size allowed {2:.1f} Mb".format(
event_file_path, convert_to_mb(event_file_size),
convert_to_mb(self._EXTENSION_EVENT_FILE_MAX_SIZE))
logger.warn(msg)
add_log_event(level=logger.LogLevel.WARNING, message=msg, forced=True)
return False
return True
def _capture_extension_events(self, handler_name, handler_event_dir_path):
"""
Capture Extension events and add them to the events_list
:param handler_name: Complete Handler Name. Eg: Microsoft.CPlat.Core.RunCommandLinux
:param handler_event_dir_path: Full path. Eg: '/var/log/azure/Microsoft.CPlat.Core.RunCommandLinux/events'
"""
# Filter out the files that do not follow the pre-defined EXTENSION_EVENT_FILE_NAME_REGEX
event_files = [event_file for event_file in os.listdir(handler_event_dir_path) if
re.match(self._EXTENSION_EVENT_FILE_NAME_REGEX, event_file) is not None]
# Pick the latest files first, we'll discard older events if len(events) > MAX_EVENT_COUNT
event_files.sort(reverse=True)
captured_extension_events_count = 0
dropped_events_with_error_count = defaultdict(int)
try:
for event_file in event_files:
event_file_path = os.path.join(handler_event_dir_path, event_file)
try:
logger.verbose("Processing event file: {0}", event_file_path)
if not self._event_file_size_allowed(event_file_path):
continue
# We support multiple events in a file, read the file and parse events.
captured_extension_events_count = self._enqueue_events_and_get_count(handler_name, event_file_path,
captured_extension_events_count,
dropped_events_with_error_count)
# We only allow MAX_NUMBER_OF_EVENTS_PER_EXTENSION_PER_PERIOD=300 maximum events per period per handler
if captured_extension_events_count >= self._MAX_NUMBER_OF_EVENTS_PER_EXTENSION_PER_PERIOD:
msg = "Reached max count for the extension: {0}; Max Limit: {1}. Skipping the rest.".format(
handler_name, self._MAX_NUMBER_OF_EVENTS_PER_EXTENSION_PER_PERIOD)
logger.warn(msg)
add_log_event(level=logger.LogLevel.WARNING, message=msg, forced=True)
break
except ServiceStoppedError:
# Not logging here as already logged once, re-raising
# Since we already started processing this file, deleting it as we could've already sent some events out
# This is a trade-off between data replication vs data loss.
raise
except Exception as error:
msg = "Failed to process event file {0}:{1}".format(event_file,
textutil.format_exception(error))
logger.warn(msg)
add_log_event(level=logger.LogLevel.WARNING, message=msg, forced=True)
finally:
# Todo: We should delete files after ensuring that we sent the data to Wireserver successfully
# from our end rather than deleting first and sending later. This is to ensure the data reliability
# of the agent telemetry pipeline.
os.remove(event_file_path)
finally:
if dropped_events_with_error_count:
msg = "Dropped events for Extension: {0}; Details:\n\t{1}".format(handler_name, '\n\t'.join(
["Reason: {0}; Dropped Count: {1}".format(k, v) for k, v in dropped_events_with_error_count.items()]))
logger.warn(msg)
add_log_event(level=logger.LogLevel.WARNING, message=msg, forced=True)
if captured_extension_events_count > 0:
logger.info("Collected {0} events for extension: {1}".format(captured_extension_events_count, handler_name))
@staticmethod
def _ensure_all_events_directories_empty(extension_events_directories):
if not extension_events_directories:
return
for extension_handler_with_event_dir in extension_events_directories:
event_dir_path = extension_handler_with_event_dir[1]
if not os.path.exists(event_dir_path):
return
log_err = True
# Delete any residue files in the events directory
for residue_file in os.listdir(event_dir_path):
try:
os.remove(os.path.join(event_dir_path, residue_file))
except Exception as error:
# Only log the first error once per handler per run to keep the logfile clean
if log_err:
logger.error("Failed to completely clear the {0} directory. Exception: {1}", event_dir_path,
ustr(error))
log_err = False
def _enqueue_events_and_get_count(self, handler_name, event_file_path, captured_events_count,
dropped_events_with_error_count):
event_file_time = datetime.datetime.fromtimestamp(os.path.getmtime(event_file_path))
# Read event file and decode it properly
with open(event_file_path, "rb") as event_file_descriptor:
event_data = event_file_descriptor.read().decode("utf-8")
# Parse the string and get the list of events
events = json.loads(event_data)
# We allow multiple events in a file but there can be an instance where the file only has a single
# JSON event and not a list. Handling that condition too
if not isinstance(events, list):
events = [events]
for event in events:
try:
self._send_telemetry_events_handler.enqueue_event(
self._parse_telemetry_event(handler_name, event, event_file_time)
)
captured_events_count += 1
except InvalidExtensionEventError as invalid_error:
# These are the errors thrown if there's an error parsing the event. We want to report these back to the
# extension publishers so that they are aware of the issues.
# The error messages are all static messages, we will use this to create a dict and emit an event at the
# end of each run to notify if there were any errors parsing events for the extension
dropped_events_with_error_count[ustr(invalid_error)] += 1
except ServiceStoppedError as stopped_error:
logger.error(
"Unable to enqueue events as service stopped: {0}. Stopping collecting extension events".format(
ustr(stopped_error)))
raise
except Exception as error:
logger.warn("Unable to parse and transmit event, error: {0}".format(error))
if captured_events_count >= self._MAX_NUMBER_OF_EVENTS_PER_EXTENSION_PER_PERIOD:
break
return captured_events_count
def _parse_telemetry_event(self, handler_name, extension_unparsed_event, event_file_time):
"""
Parse the Json event file and convert it to TelemetryEvent object with the required data.
:return: Complete TelemetryEvent with all required fields filled up properly. Raises if event breaches contract.
"""
extension_event = self._parse_event_and_ensure_it_is_valid(extension_unparsed_event)
# Create a telemetry event, add all common parameters to the event
# and then overwrite all the common params with extension events params if same
event = TelemetryEvent(TELEMETRY_LOG_EVENT_ID, TELEMETRY_LOG_PROVIDER_ID)
event.file_type = "json"
CollectTelemetryEventsHandler.add_common_params_to_telemetry_event(event, event_file_time)
replace_or_add_params = {
GuestAgentGenericLogsSchema.EventName: "{0}-{1}".format(handler_name, extension_event[
ExtensionEventSchema.Version.lower()]),
GuestAgentGenericLogsSchema.CapabilityUsed: extension_event[ExtensionEventSchema.EventLevel.lower()],
GuestAgentGenericLogsSchema.TaskName: extension_event[ExtensionEventSchema.TaskName.lower()],
GuestAgentGenericLogsSchema.Context1: extension_event[ExtensionEventSchema.Message.lower()],
GuestAgentGenericLogsSchema.Context2: extension_event[ExtensionEventSchema.Timestamp.lower()],
GuestAgentGenericLogsSchema.Context3: extension_event[ExtensionEventSchema.OperationId.lower()],
GuestAgentGenericLogsSchema.EventPid: extension_event[ExtensionEventSchema.EventPid.lower()],
GuestAgentGenericLogsSchema.EventTid: extension_event[ExtensionEventSchema.EventTid.lower()]
}
self._replace_or_add_param_in_event(event, replace_or_add_params)
return event
def _parse_event_and_ensure_it_is_valid(self, extension_event):
"""
Parse the Json event from file. Raise InvalidExtensionEventError if the event breaches pre-set contract.
:param extension_event: The json event from file
:return: Verified Json event that qualifies the contract.
"""
def _clean_value(k, v):
if v is not None:
if isinstance(v, int):
if k.lower() in [ExtensionEventSchema.EventPid.lower(), ExtensionEventSchema.EventTid.lower()]:
return str(v)
return v.strip()
return v
event_size = 0
key_err_msg = "{0}: {1} not found"
# Convert the dict to all lower keys to avoid schema confusion.
# Only pick the params that we care about and skip the rest.
event = dict((k.lower(), _clean_value(k, v)) for k, v in extension_event.items() if
k.lower() in self._EXTENSION_EVENT_REQUIRED_FIELDS)
# Trim message and only pick the first 3k chars
message_key = ExtensionEventSchema.Message.lower()
if message_key in event:
event[message_key] = event[message_key][:self._EXTENSION_EVENT_MAX_MSG_LEN]
else:
raise InvalidExtensionEventError(
key_err_msg.format(InvalidExtensionEventError.MissingKeyError, ExtensionEventSchema.Message))
if not event[message_key]:
raise InvalidExtensionEventError(
"{0}: {1} should not be empty".format(InvalidExtensionEventError.EmptyMessageError,
ExtensionEventSchema.Message))
for required_key in self._EXTENSION_EVENT_REQUIRED_FIELDS:
# If all required keys not in event then raise
if required_key not in event:
raise InvalidExtensionEventError(
key_err_msg.format(InvalidExtensionEventError.MissingKeyError, required_key))
# If the event_size > _EXTENSION_EVENT_MAX_SIZE=6k, then raise
if event_size > self._EXTENSION_EVENT_MAX_SIZE:
raise InvalidExtensionEventError(
"{0}: max event size allowed: {1}".format(InvalidExtensionEventError.OversizeEventError,
self._EXTENSION_EVENT_MAX_SIZE))
event_size += len(event[required_key])
return event
@staticmethod
def _replace_or_add_param_in_event(event, replace_or_add_params):
for param in event.parameters:
if param.name in replace_or_add_params:
param.value = replace_or_add_params.pop(param.name)
if not replace_or_add_params:
# All values replaced, return
return
# Add the remaining params to the event
for param_name in replace_or_add_params:
event.parameters.append(TelemetryEventParam(param_name, replace_or_add_params[param_name]))
class _CollectAndEnqueueEvents(PeriodicOperation):
"""
Periodic operation to collect telemetry events located in the events folder and enqueue them for the
SendTelemetryHandler thread.
"""
_EVENT_COLLECTION_PERIOD = datetime.timedelta(minutes=1)
def __init__(self, send_telemetry_events_handler):
super(_CollectAndEnqueueEvents, self).__init__(_CollectAndEnqueueEvents._EVENT_COLLECTION_PERIOD)
self._send_telemetry_events_handler = send_telemetry_events_handler
def _operation(self):
"""
Periodically send any events located in the events folder
"""
try:
if self._send_telemetry_events_handler.stopped():
logger.warn("{0} service is not running, skipping iteration.".format(
self._send_telemetry_events_handler.get_thread_name()))
return
self.process_events()
except Exception as error:
err_msg = "Failure in collecting telemetry events: {0}".format(ustr(error))
add_event(op=WALAEventOperation.UnhandledError, message=err_msg, is_success=False)
def process_events(self):
"""
Returns a list of events that need to be sent to the telemetry pipeline and deletes the corresponding files
from the events directory.
"""
event_directory_full_path = os.path.join(conf.get_lib_dir(), EVENTS_DIRECTORY)
event_files = os.listdir(event_directory_full_path)
debug_info = CollectOrReportEventDebugInfo(operation=CollectOrReportEventDebugInfo.OP_COLLECT)
for event_file in event_files:
try:
match = EVENT_FILE_REGEX.search(event_file)
if match is None:
continue
event_file_path = os.path.join(event_directory_full_path, event_file)
try:
logger.verbose("Processing event file: {0}", event_file_path)
with open(event_file_path, "rb") as event_fd:
event_data = event_fd.read().decode("utf-8")
event = parse_event(event_data)
# "legacy" events are events produced by previous versions of the agent (<= 2.2.46) and extensions;
# they do not include all the telemetry fields, so we add them here
is_legacy_event = match.group('agent_event') is None
if is_legacy_event:
# We'll use the file creation time for the event's timestamp
event_file_creation_time_epoch = os.path.getmtime(event_file_path)
event_file_creation_time = datetime.datetime.fromtimestamp(event_file_creation_time_epoch)
if event.is_extension_event():
_CollectAndEnqueueEvents._trim_legacy_extension_event_parameters(event)
CollectTelemetryEventsHandler.add_common_params_to_telemetry_event(event,
event_file_creation_time)
else:
_CollectAndEnqueueEvents._update_legacy_agent_event(event,
event_file_creation_time)
self._send_telemetry_events_handler.enqueue_event(event)
finally:
# Todo: We should delete files after ensuring that we sent the data to Wireserver successfully
# from our end rather than deleting first and sending later. This is to ensure the data reliability
# of the agent telemetry pipeline.
os.remove(event_file_path)
except ServiceStoppedError as stopped_error:
logger.error(
"Unable to enqueue events as service stopped: {0}, skipping events collection".format(
ustr(stopped_error)))
except UnicodeError as uni_err:
debug_info.update_unicode_error(uni_err)
except Exception as error:
debug_info.update_op_error(error)
debug_info.report_debug_info()
@staticmethod
def _update_legacy_agent_event(event, event_creation_time):
# Ensure that if an agent event is missing a field from the schema defined since 2.2.47, the missing fields
# will be appended, ensuring the event schema is complete before the event is reported.
new_event = TelemetryEvent()
new_event.parameters = []
CollectTelemetryEventsHandler.add_common_params_to_telemetry_event(new_event, event_creation_time)
event_params = dict([(param.name, param.value) for param in event.parameters])
new_event_params = dict([(param.name, param.value) for param in new_event.parameters])
missing_params = set(new_event_params.keys()).difference(set(event_params.keys()))
params_to_add = []
for param_name in missing_params:
params_to_add.append(TelemetryEventParam(param_name, new_event_params[param_name]))
event.parameters.extend(params_to_add)
@staticmethod
def _trim_legacy_extension_event_parameters(event):
"""
This method is called for extension events before they are sent out. Per the agreement with extension
publishers, the parameters that belong to extensions and will be reported intact are Name, Version, Operation,
OperationSuccess, Message, and Duration. Since there is nothing preventing extensions to instantiate other
fields (which belong to the agent), we call this method to ensure the rest of the parameters are trimmed since
they will be replaced with values coming from the agent.
:param event: Extension event to trim.
:return: Trimmed extension event; containing only extension-specific parameters.
"""
params_to_keep = dict().fromkeys([
GuestAgentExtensionEventsSchema.Name,
GuestAgentExtensionEventsSchema.Version,
GuestAgentExtensionEventsSchema.Operation,
GuestAgentExtensionEventsSchema.OperationSuccess,
GuestAgentExtensionEventsSchema.Message,
GuestAgentExtensionEventsSchema.Duration
])
trimmed_params = []
for param in event.parameters:
if param.name in params_to_keep:
trimmed_params.append(param)
event.parameters = trimmed_params
class CollectTelemetryEventsHandler(ThreadHandlerInterface):
"""
This Handler takes care of fetching the Extension Telemetry events from the {extension_events_dir} and sends it to
Kusto for advanced debuggability.
"""
_THREAD_NAME = "TelemetryEventsCollector"
def __init__(self, send_telemetry_events_handler):
self.should_run = True
self.thread = None
self._send_telemetry_events_handler = send_telemetry_events_handler
@staticmethod
def get_thread_name():
return CollectTelemetryEventsHandler._THREAD_NAME
def run(self):
logger.info("Start Extension Telemetry service.")
self.start()
def is_alive(self):
return self.thread is not None and self.thread.is_alive()
def start(self):
self.thread = threading.Thread(target=self.daemon)
self.thread.setDaemon(True)
self.thread.setName(CollectTelemetryEventsHandler.get_thread_name())
self.thread.start()
def stop(self):
"""
Stop server communication and join the thread to main thread.
"""
self.should_run = False
if self.is_alive():
self.thread.join()
def stopped(self):
return not self.should_run
def daemon(self):
periodic_operations = [
_CollectAndEnqueueEvents(self._send_telemetry_events_handler)
]
is_etp_enabled = get_supported_feature_by_name(SupportedFeatureNames.ExtensionTelemetryPipeline).is_supported
logger.info("Extension Telemetry pipeline enabled: {0}".format(is_etp_enabled))
if is_etp_enabled:
periodic_operations.append(_ProcessExtensionEvents(self._send_telemetry_events_handler))
logger.info("Successfully started the {0} thread".format(self.get_thread_name()))
while not self.stopped():
try:
for periodic_op in periodic_operations:
periodic_op.run()
except Exception as error:
logger.warn(
"An error occurred in the Telemetry Extension thread main loop; will skip the current iteration.\n{0}",
ustr(error))
finally:
PeriodicOperation.sleep_until_next_operation(periodic_operations)
@staticmethod
def add_common_params_to_telemetry_event(event, event_time):
reporter = get_event_logger()
reporter.add_common_event_parameters(event, event_time) Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/env.py 0000664 0000000 0000000 00000026663 14626177470 0022523 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import datetime
import re
import os
import socket
import threading
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.dhcp import get_dhcp_handler
from azurelinuxagent.common.event import add_periodic, WALAEventOperation, add_event
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
from azurelinuxagent.ga.periodic_operation import PeriodicOperation
CACHE_PATTERNS = [
re.compile("^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), # pylint: disable=W1401
re.compile("^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), # pylint: disable=W1401
re.compile("^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) # pylint: disable=W1401
]
MAXIMUM_CACHED_FILES = 50
def get_env_handler():
return EnvHandler()
class RemovePersistentNetworkRules(PeriodicOperation):
def __init__(self, osutil):
super(RemovePersistentNetworkRules, self).__init__(conf.get_remove_persistent_net_rules_period())
self.osutil = osutil
def _operation(self):
self.osutil.remove_rules_files()
class MonitorDhcpClientRestart(PeriodicOperation):
def __init__(self, osutil):
super(MonitorDhcpClientRestart, self).__init__(conf.get_monitor_dhcp_client_restart_period())
self.osutil = osutil
self.dhcp_handler = get_dhcp_handler()
self.dhcp_handler.conf_routes()
self.dhcp_warning_enabled = True
self.dhcp_id_list = []
def _operation(self):
if len(self.dhcp_id_list) == 0:
self.dhcp_id_list = self._get_dhcp_client_pid()
return
if all(self.osutil.check_pid_alive(pid) for pid in self.dhcp_id_list):
return
new_pid = self._get_dhcp_client_pid()
if len(new_pid) != 0 and new_pid != self.dhcp_id_list:
logger.info("EnvMonitor: Detected dhcp client restart. Restoring routing table.")
self.dhcp_handler.conf_routes()
self.dhcp_id_list = new_pid
def _get_dhcp_client_pid(self):
pid = []
try:
# return a sorted list since handle_dhclient_restart needs to compare the previous value with
# the new value and the comparison should not be affected by the order of the items in the list
pid = sorted(self.osutil.get_dhcp_pid())
if len(pid) == 0 and self.dhcp_warning_enabled:
logger.warn("Dhcp client is not running.")
except Exception as exception:
if self.dhcp_warning_enabled:
logger.error("Failed to get the PID of the DHCP client: {0}", ustr(exception))
self.dhcp_warning_enabled = len(pid) != 0
return pid
class EnableFirewall(PeriodicOperation):
def __init__(self, osutil, protocol):
super(EnableFirewall, self).__init__(conf.get_enable_firewall_period())
self._osutil = osutil
self._protocol = protocol
self._try_remove_legacy_firewall_rule = False
self._is_first_setup = True
self._reset_count = 0
self._report_after = datetime.datetime.min
self._report_period = None # None indicates "report immediately"
def _operation(self):
# If the rules ever change we must reset all rules and start over again.
#
# There was a rule change at 2.2.26, which started dropping non-root traffic
# to WireServer. The previous rules allowed traffic. Having both rules in
# place negated the fix in 2.2.26. Removing only the legacy rule and keeping other rules intact.
#
# We only try to remove the legacy firewall rule once on service start (irrespective of its exit code).
if not self._try_remove_legacy_firewall_rule:
self._osutil.remove_legacy_firewall_rule(dst_ip=self._protocol.get_endpoint())
self._try_remove_legacy_firewall_rule = True
success, missing_firewall_rules = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid())
if len(missing_firewall_rules) > 0:
if self._is_first_setup:
msg = "Created firewall rules for the Azure Fabric:\n{0}".format(self._get_firewall_state())
logger.info(msg)
add_event(op=WALAEventOperation.Firewall, message=msg)
else:
self._reset_count += 1
# We report immediately (when period is None) the first 5 instances, then we switch the period to every few hours
if self._report_period is None:
msg = "Some firewall rules were missing: {0}. Re-created all the rules:\n{1}".format(missing_firewall_rules, self._get_firewall_state())
if self._reset_count >= 5:
self._report_period = datetime.timedelta(hours=3)
self._reset_count = 0
self._report_after = datetime.datetime.now() + self._report_period
elif datetime.datetime.now() >= self._report_after:
msg = "Some firewall rules were missing: {0}. This has happened {1} time(s) since the last report. Re-created all the rules:\n{2}".format(
missing_firewall_rules, self._reset_count, self._get_firewall_state())
self._reset_count = 0
self._report_after = datetime.datetime.now() + self._report_period
else:
msg = ""
if msg != "":
logger.info(msg)
add_event(op=WALAEventOperation.ResetFirewall, message=msg)
add_periodic(
logger.EVERY_HOUR,
AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.Firewall,
is_success=success,
log_event=False)
self._is_first_setup = False
def _get_firewall_state(self):
try:
return self._osutil.get_firewall_list()
except Exception as e:
return "Failed to get the firewall state: {0}".format(ustr(e))
class LogFirewallRules(PeriodicOperation):
"""
Log firewall rules state once a day.
Goal is to capture the firewall state when the agent service startup,
in addition to add more debug data and would be more useful long term.
"""
def __init__(self, osutil):
super(LogFirewallRules, self).__init__(conf.get_firewall_rules_log_period())
self._osutil = osutil
def _operation(self):
# Log firewall rules state once a day
logger.info("Current Firewall rules:\n{0}".format(self._osutil.get_firewall_list()))
class SetRootDeviceScsiTimeout(PeriodicOperation):
def __init__(self, osutil):
super(SetRootDeviceScsiTimeout, self).__init__(conf.get_root_device_scsi_timeout_period())
self._osutil = osutil
def _operation(self):
self._osutil.set_scsi_disks_timeout(conf.get_root_device_scsi_timeout())
class MonitorHostNameChanges(PeriodicOperation):
def __init__(self, osutil):
super(MonitorHostNameChanges, self).__init__(conf.get_monitor_hostname_period())
self._osutil = osutil
self._hostname = self._osutil.get_hostname_record()
def _operation(self):
curr_hostname = socket.gethostname()
if curr_hostname != self._hostname:
logger.info("EnvMonitor: Detected hostname change: {0} -> {1}",
self._hostname,
curr_hostname)
self._osutil.set_hostname(curr_hostname)
try:
self._osutil.publish_hostname(curr_hostname, recover_nic=True)
except Exception as e:
msg = "Error while publishing the hostname: {0}".format(e)
add_event(AGENT_NAME, op=WALAEventOperation.HostnamePublishing, is_success=False, message=msg, log_event=False)
self._hostname = curr_hostname
class EnvHandler(ThreadHandlerInterface):
"""
Monitor changes to dhcp and hostname.
If dhcp client process re-start has occurred, reset routes, dhcp with fabric.
Monitor scsi disk.
If new scsi disk found, set timeout
"""
_THREAD_NAME = "EnvHandler"
@staticmethod
def get_thread_name():
return EnvHandler._THREAD_NAME
def __init__(self):
self.stopped = True
self.hostname = None
self.env_thread = None
def run(self):
if not self.stopped:
logger.info("Stop existing env monitor service.")
self.stop()
self.stopped = False
logger.info("Starting env monitor service.")
self.start()
def is_alive(self):
return self.env_thread.is_alive()
def start(self):
self.env_thread = threading.Thread(target=self.daemon)
self.env_thread.setDaemon(True)
self.env_thread.setName(self.get_thread_name())
self.env_thread.start()
def daemon(self):
try:
# The initialization of the protocol needs to be done within the environment thread itself rather
# than initializing it in the ExtHandler thread. This is done to avoid any concurrency issues as each
# thread would now have its own ProtocolUtil object as per the SingletonPerThread model.
protocol_util = get_protocol_util()
protocol = protocol_util.get_protocol()
osutil = get_osutil()
periodic_operations = [
RemovePersistentNetworkRules(osutil),
MonitorDhcpClientRestart(osutil),
]
if conf.enable_firewall():
periodic_operations.append(EnableFirewall(osutil, protocol))
periodic_operations.append(LogFirewallRules(osutil))
if conf.get_root_device_scsi_timeout() is not None:
periodic_operations.append(SetRootDeviceScsiTimeout(osutil))
if conf.get_monitor_hostname():
periodic_operations.append(MonitorHostNameChanges(osutil))
while not self.stopped:
try:
for op in periodic_operations:
op.run()
except Exception as e:
logger.error("An error occurred in the environment thread main loop; will skip the current iteration.\n{0}", ustr(e))
finally:
PeriodicOperation.sleep_until_next_operation(periodic_operations)
except Exception as e:
logger.error("An error occurred in the environment thread; will exit the thread.\n{0}", ustr(e))
def stop(self):
"""
Stop server communication and join the thread to main thread.
"""
self.stopped = True
if self.env_thread is not None:
self.env_thread.join()
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/extensionprocessutil.py 0000664 0000000 0000000 00000022144 14626177470 0026232 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
#
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import os
import re
import signal
import time
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.common.exception import ExtensionErrorCodes, ExtensionOperationError, ExtensionError
from azurelinuxagent.common.future import ustr
TELEMETRY_MESSAGE_MAX_LEN = 3200
def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
"""
Utility function that waits for the process to complete within the given time frame. This function will terminate
the process if when the given time frame elapses.
:param process: Reference to a running process
:param timeout: Number of seconds to wait for the process to complete before killing it
:return: Two parameters: boolean for if the process timed out and the return code of the process (None if timed out)
"""
while timeout > 0 and process.poll() is None:
time.sleep(1)
timeout -= 1
return_code = None
throttled_time = 0
if timeout == 0:
throttled_time = get_cpu_throttled_time(cpu_cgroup)
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
else:
# process completed or forked; sleep 1 sec to give the child process (if any) a chance to start
time.sleep(1)
return_code = process.wait()
return timeout == 0, return_code, throttled_time
def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_cgroup=None):
"""
Utility function that waits for process completion and retrieves its output (stdout and stderr) if it completed
before the timeout period. Otherwise, the process will get killed and an ExtensionError will be raised.
In case the return code is non-zero, ExtensionError will be raised.
:param process: Reference to a running process
:param command: The extension command to run
:param timeout: Number of seconds to wait before killing the process
:param stdout: Must be a file since we seek on it when parsing the subprocess output
:param stderr: Must be a file since we seek on it when parsing the subprocess outputs
:param error_code: The error code to set if we raise an ExtensionError
:param cpu_cgroup: Reference the cpu cgroup name and path
:return:
"""
# Wait for process completion or timeout
timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup)
process_output = read_output(stdout, stderr)
if timed_out:
if cpu_cgroup is not None: # Report CPUThrottledTime when timeout happens
raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output),
code=ExtensionErrorCodes.PluginHandlerScriptTimedout)
raise ExtensionError("Timeout({0}): {1}\n{2}".format(timeout, command, process_output),
code=ExtensionErrorCodes.PluginHandlerScriptTimedout)
if return_code != 0:
noexec_warning = ""
if return_code == 126: # Permission denied
noexec_path = _check_noexec()
if noexec_path is not None:
noexec_warning = "\nWARNING: {0} is mounted with the noexec flag, which can prevent execution of VM Extensions.".format(noexec_path)
raise ExtensionOperationError(
"Non-zero exit code: {0}, {1}{2}\n{3}".format(return_code, command, noexec_warning, process_output),
code=error_code,
exit_code=return_code)
return process_output
#
# Collect a sample of errors while checking for the noexec flag. Consider removing this telemetry after a few releases.
#
_COLLECT_NOEXEC_ERRORS = True
def _check_noexec():
"""
Check if /var is mounted with the noexec flag.
"""
# W0603: Using the global statement (global-statement)
# OK to disable; _COLLECT_NOEXEC_ERRORS is used only within _check_noexec, but needs to persist across calls.
global _COLLECT_NOEXEC_ERRORS # pylint: disable=W0603
try:
agent_dir = conf.get_lib_dir()
with open('/proc/mounts', 'r') as f:
while True:
line = f.readline()
if line == "": # EOF
break
# The mount point is on the second column, and the flags are on the fourth. e.g.
#
# # grep /var /proc/mounts
# /dev/mapper/rootvg-varlv /var xfs rw,seclabel,noexec,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota 0 0
#
columns = line.split()
mount_point = columns[1]
flags = columns[3]
if agent_dir.startswith(mount_point) and "noexec" in flags:
message = "The noexec flag is set on {0}. This can prevent extensions from executing.".format(mount_point)
logger.warn(message)
add_event(op=WALAEventOperation.NoExec, is_success=False, message=message)
return mount_point
except Exception as e:
message = "Error while checking the noexec flag: {0}".format(e)
logger.warn(message)
if _COLLECT_NOEXEC_ERRORS:
_COLLECT_NOEXEC_ERRORS = False
add_event(op=WALAEventOperation.NoExec, is_success=False, log_event=False, message="Error while checking the noexec flag: {0}".format(e))
return None
SAS_TOKEN_RE = re.compile(r'(https://\S+\?)((sv|st|se|sr|sp|sip|spr|sig)=\S+)+', flags=re.IGNORECASE)
def read_output(stdout, stderr):
"""
Read the output of the process sent to stdout and stderr and trim them to the max appropriate length.
:param stdout: File containing the stdout of the process
:param stderr: File containing the stderr of the process
:return: Returns the formatted concatenated stdout and stderr of the process
"""
try:
stdout.seek(0)
stderr.seek(0)
stdout = ustr(stdout.read(TELEMETRY_MESSAGE_MAX_LEN), encoding='utf-8',
errors='backslashreplace')
stderr = ustr(stderr.read(TELEMETRY_MESSAGE_MAX_LEN), encoding='utf-8',
errors='backslashreplace')
def redact(s):
# redact query strings that look like SAS tokens
return SAS_TOKEN_RE.sub(r'\1', s)
return format_stdout_stderr(redact(stdout), redact(stderr))
except Exception as e:
return format_stdout_stderr("", "Cannot read stdout/stderr: {0}".format(ustr(e)))
def format_stdout_stderr(stdout, stderr):
"""
Format stdout and stderr's output to make it suitable in telemetry.
The goal is to maximize the amount of output given the constraints
of telemetry.
For example, if there is more stderr output than stdout output give
more buffer space to stderr.
:param str stdout: characters captured from stdout
:param str stderr: characters captured from stderr
:param int max_len: maximum length of the string to return
:return: a string formatted with stdout and stderr that is less than
or equal to max_len.
:rtype: str
"""
template = "[stdout]\n{0}\n\n[stderr]\n{1}"
# +6 == len("{0}") + len("{1}")
max_len = TELEMETRY_MESSAGE_MAX_LEN
max_len_each = int((max_len - len(template) + 6) / 2)
if max_len_each <= 0:
return ''
def to_s(captured_stdout, stdout_offset, captured_stderr, stderr_offset):
s = template.format(captured_stdout[stdout_offset:], captured_stderr[stderr_offset:])
return s
if len(stdout) + len(stderr) < max_len:
return to_s(stdout, 0, stderr, 0)
elif len(stdout) < max_len_each:
bonus = max_len_each - len(stdout)
stderr_len = min(max_len_each + bonus, len(stderr))
return to_s(stdout, 0, stderr, -1*stderr_len)
elif len(stderr) < max_len_each:
bonus = max_len_each - len(stderr)
stdout_len = min(max_len_each + bonus, len(stdout))
return to_s(stdout, -1*stdout_len, stderr, 0)
else:
return to_s(stdout, -1*max_len_each, stderr, -1*max_len_each)
def get_cpu_throttled_time(cpu_cgroup):
"""
return the throttled time for the given cgroup.
"""
throttled_time = 0
if cpu_cgroup is not None:
try:
throttled_time = cpu_cgroup.get_cpu_throttled_time(read_previous_throttled_time=False)
except Exception as e:
logger.warn("Failed to get cpu throttled time for the extension: {0}", ustr(e))
return throttled_time
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/exthandlers.py 0000664 0000000 0000000 00000344535 14626177470 0024255 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import copy
import datetime
import glob
import json
import os
import re
import shutil
import stat
import tempfile
import time
import zipfile
from distutils.version import LooseVersion
from collections import defaultdict
from functools import partial
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.common import version
from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \
SupportedFeatureNames, get_supported_feature_by_name, get_agent_supported_features_list_for_crp
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.common.datacontract import get_properties, set_properties
from azurelinuxagent.common.errorstate import ErrorState
from azurelinuxagent.common.event import add_event, elapsed_milliseconds, WALAEventOperation, \
add_periodic, EVENTS_DIRECTORY
from azurelinuxagent.common.exception import ExtensionDownloadError, ExtensionError, ExtensionErrorCodes, \
ExtensionOperationError, ExtensionUpdateError, ProtocolError, ProtocolNotFoundError, ExtensionsGoalStateError, \
GoalStateAggregateStatusCodes, MultiConfigExtensionEnableError
from azurelinuxagent.common.future import ustr, is_file_not_found_error
from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource
from azurelinuxagent.common.protocol.restapi import ExtensionStatus, ExtensionSubStatus, Extension, ExtHandlerStatus, \
VMStatus, GoalStateAggregateStatus, ExtensionState, ExtensionRequestedState, ExtensionSettings
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
_HANDLER_NAME_PATTERN = r'^([^-]+)'
_HANDLER_VERSION_PATTERN = r'(\d+(?:\.\d+)*)'
_HANDLER_PATTERN = _HANDLER_NAME_PATTERN + r"-" + _HANDLER_VERSION_PATTERN
_HANDLER_PKG_PATTERN = re.compile(_HANDLER_PATTERN + r'\.zip$', re.IGNORECASE)
_DEFAULT_EXT_TIMEOUT_MINUTES = 90
_VALID_HANDLER_STATUS = ['Ready', 'NotReady', "Installing", "Unresponsive"]
HANDLER_NAME_PATTERN = re.compile(_HANDLER_NAME_PATTERN, re.IGNORECASE)
HANDLER_COMPLETE_NAME_PATTERN = re.compile(_HANDLER_PATTERN + r'$', re.IGNORECASE)
HANDLER_PKG_EXT = ".zip"
# This is the default value for the env variables, whenever we call a command which is not an update scenario, we
# set the env variable value to NOT_RUN to reduce ambiguity for the extension publishers
NOT_RUN = "NOT_RUN"
# Max size of individual status file
_MAX_STATUS_FILE_SIZE_IN_BYTES = 128 * 1024 # 128K
# Truncating length of fields.
_MAX_STATUS_MESSAGE_LENGTH = 1024 # 1k message allowed to be shown in the portal.
_MAX_SUBSTATUS_FIELD_LENGTH = 10 * 1024 # Making 10K; allowing fields to have enough debugging information..
_TRUNCATED_SUFFIX = u" ... [TRUNCATED]"
# Status file specific retries and delays.
_NUM_OF_STATUS_FILE_RETRIES = 5
_STATUS_FILE_RETRY_DELAY = 2 # seconds
# This is the default sequence number we use when there are no settings available for Handlers
_DEFAULT_SEQ_NO = "0"
class ExtHandlerStatusValue(object):
"""
Statuses for Extension Handlers
"""
ready = "Ready"
not_ready = "NotReady"
class ExtensionStatusValue(object):
"""
Statuses for Extensions
"""
transitioning = "transitioning"
warning = "warning"
error = "error"
success = "success"
STRINGS = ['transitioning', 'warning', 'error', 'success']
_EXTENSION_TERMINAL_STATUSES = [ExtensionStatusValue.error, ExtensionStatusValue.success]
class ExtCommandEnvVariable(object):
Prefix = "AZURE_GUEST_AGENT"
DisableReturnCode = "{0}_DISABLE_CMD_EXIT_CODE".format(Prefix)
DisableReturnCodeMultipleExtensions = "{0}_DISABLE_CMD_EXIT_CODES_MULTIPLE_EXTENSIONS".format(Prefix)
UninstallReturnCode = "{0}_UNINSTALL_CMD_EXIT_CODE".format(Prefix)
ExtensionPath = "{0}_EXTENSION_PATH".format(Prefix)
ExtensionVersion = "{0}_EXTENSION_VERSION".format(Prefix)
ExtensionSeqNumber = "ConfigSequenceNumber" # At par with Windows Guest Agent
ExtensionName = "ConfigExtensionName"
UpdatingFromVersion = "{0}_UPDATING_FROM_VERSION".format(Prefix)
WireProtocolAddress = "{0}_WIRE_PROTOCOL_ADDRESS".format(Prefix)
ExtensionSupportedFeatures = "{0}_EXTENSION_SUPPORTED_FEATURES".format(Prefix)
def validate_has_key(obj, key, full_key_path):
if key not in obj:
raise ExtensionStatusError(msg="Invalid status format by extension: Missing {0} key".format(full_key_path),
code=ExtensionStatusError.StatusFileMalformed)
def validate_in_range(val, valid_range, name):
if val not in valid_range:
raise ExtensionStatusError(msg="Invalid value {0} in range {1} at the node {2}".format(val, valid_range, name),
code=ExtensionStatusError.StatusFileMalformed)
def parse_formatted_message(formatted_message):
if formatted_message is None:
return None
validate_has_key(formatted_message, 'lang', 'formattedMessage/lang')
validate_has_key(formatted_message, 'message', 'formattedMessage/message')
return formatted_message.get('message')
def parse_ext_substatus(substatus):
# Check extension sub status format
validate_has_key(substatus, 'status', 'substatus/status')
validate_in_range(substatus['status'], ExtensionStatusValue.STRINGS, 'substatus/status')
status = ExtensionSubStatus()
status.name = substatus.get('name')
status.status = substatus.get('status')
status.code = substatus.get('code', 0)
formatted_message = substatus.get('formattedMessage')
status.message = parse_formatted_message(formatted_message)
return status
def parse_ext_status(ext_status, data):
if data is None:
return
if not isinstance(data, list):
data_string = ustr(data)[:4096]
raise ExtensionStatusError(msg="The extension status must be an array: {0}".format(data_string), code=ExtensionStatusError.StatusFileMalformed)
if not data:
return
# Currently, only the first status will be reported
data = data[0]
# Check extension status format
validate_has_key(data, 'status', 'status')
status_data = data['status']
validate_has_key(status_data, 'status', 'status/status')
status = status_data['status']
if status not in ExtensionStatusValue.STRINGS:
status = ExtensionStatusValue.error
applied_time = status_data.get('configurationAppliedTime')
ext_status.configurationAppliedTime = applied_time
ext_status.operation = status_data.get('operation')
ext_status.status = status
ext_status.code = status_data.get('code', 0)
formatted_message = status_data.get('formattedMessage')
ext_status.message = parse_formatted_message(formatted_message)
substatus_list = status_data.get('substatus', [])
# some extensions incorrectly report an empty substatus with a null value
if substatus_list is None:
substatus_list = []
for substatus in substatus_list:
if substatus is not None:
ext_status.substatusList.append(parse_ext_substatus(substatus))
def migrate_handler_state():
"""
Migrate handler state and status (if they exist) from an agent-owned directory into the
handler-owned config directory
Notes:
- The v2.0.x branch wrote all handler-related state into the handler-owned config
directory (e.g., /var/lib/waagent/Microsoft.Azure.Extensions.LinuxAsm-2.0.1/config).
- The v2.1.x branch original moved that state into an agent-owned handler
state directory (e.g., /var/lib/waagent/handler_state).
- This move can cause v2.1.x agents to multiply invoke a handler's install command. It also makes
clean-up more difficult since the agent must remove the state as well as the handler directory.
"""
handler_state_path = os.path.join(conf.get_lib_dir(), "handler_state")
if not os.path.isdir(handler_state_path):
return
for handler_path in glob.iglob(os.path.join(handler_state_path, "*")):
handler = os.path.basename(handler_path)
handler_config_path = os.path.join(conf.get_lib_dir(), handler, "config")
if os.path.isdir(handler_config_path):
for file in ("State", "Status"): # pylint: disable=redefined-builtin
from_path = os.path.join(handler_state_path, handler, file.lower())
to_path = os.path.join(handler_config_path, "Handler" + file)
if os.path.isfile(from_path) and not os.path.isfile(to_path):
try:
shutil.move(from_path, to_path)
except Exception as e:
logger.warn(
"Exception occurred migrating {0} {1} file: {2}",
handler,
file,
str(e))
try:
shutil.rmtree(handler_state_path)
except Exception as e:
logger.warn("Exception occurred removing {0}: {1}", handler_state_path, str(e))
return
class ExtHandlerState(object):
NotInstalled = "NotInstalled"
Installed = "Installed"
Enabled = "Enabled"
FailedUpgrade = "FailedUpgrade"
class GoalStateStatus(object):
"""
This is an Enum to define the State of the GoalState as a whole. This is reported as part of the
'vmArtifactsAggregateStatus.goalStateAggregateStatus' in the status blob.
Note: not to be confused with the State of the ExtHandler which reported as part of 'handlerAggregateStatus'
"""
Success = "Success"
Failed = "Failed"
# The following field is not used now but would be needed once Status reporting is moved to a separate thread.
Initialize = "Initialize"
Transitioning = "Transitioning"
def get_exthandlers_handler(protocol):
return ExtHandlersHandler(protocol)
def list_agent_lib_directory(skip_agent_package=True, ignore_names=None):
lib_dir = conf.get_lib_dir()
for name in os.listdir(lib_dir):
path = os.path.join(lib_dir, name)
if ignore_names is not None and any(ignore_names) and name in ignore_names:
continue
if skip_agent_package and (version.is_agent_package(path) or version.is_agent_path(path)):
continue
yield name, path
class ExtHandlersHandler(object):
def __init__(self, protocol):
self.protocol = protocol
self.ext_handlers = None
# The GoalState Aggregate status needs to report the last status of the GoalState. Since we only process
# extensions on goal state change, we need to maintain its state.
# Setting the status to None here. This would be overridden as soon as the first GoalState is processed
self.__gs_aggregate_status = None
self.report_status_error_state = ErrorState()
def __last_gs_unsupported(self):
# Return if the last GoalState was unsupported
return self.__gs_aggregate_status is not None and \
self.__gs_aggregate_status.status == GoalStateStatus.Failed and \
self.__gs_aggregate_status.code == GoalStateAggregateStatusCodes.GoalStateUnsupportedRequiredFeatures
def run(self):
try:
gs = self.protocol.get_goal_state()
egs = gs.extensions_goal_state
# self.ext_handlers needs to be initialized before returning, since status reporting depends on it; also
# we make a deep copy of the extensions, since changes are made to self.ext_handlers while processing the extensions
self.ext_handlers = copy.deepcopy(egs.extensions)
if self._extensions_on_hold():
return
utc_start = datetime.datetime.utcnow()
error = None
message = "ProcessExtensionsGoalState started [{0} channel: {1} source: {2} activity: {3} correlation {4} created: {5}]".format(
egs.id, egs.channel, egs.source, egs.activity_id, egs.correlation_id, egs.created_on_timestamp)
logger.info('')
logger.info(message)
add_event(op=WALAEventOperation.ExtensionProcessing, message=message)
try:
self.__process_and_handle_extensions(egs.svd_sequence_number, egs.id)
self._cleanup_outdated_handlers()
except Exception as e:
error = u"Error processing extensions:{0}".format(textutil.format_exception(e))
finally:
duration = elapsed_milliseconds(utc_start)
if error is None:
message = 'ProcessExtensionsGoalState completed [{0} {1} ms]\n'.format(egs.id, duration)
logger.info(message)
else:
message = 'ProcessExtensionsGoalState failed [{0} {1} ms]\n{2}'.format(egs.id, duration, error)
logger.error(message)
add_event(op=WALAEventOperation.ExtensionProcessing, is_success=(error is None), message=message, log_event=False, duration=duration)
except Exception as error:
msg = u"ProcessExtensionsInGoalState - Exception processing extension handlers:{0}".format(textutil.format_exception(error))
logger.error(msg)
add_event(op=WALAEventOperation.ExtensionProcessing, is_success=False, message=msg, log_event=False)
def __get_unsupported_features(self):
required_features = self.protocol.get_goal_state().extensions_goal_state.required_features
supported_features = get_agent_supported_features_list_for_crp()
return [feature for feature in required_features if feature not in supported_features]
def __process_and_handle_extensions(self, svd_sequence_number, goal_state_id):
try:
# Verify we satisfy all required features, if any. If not, report failure here itself, no need to process anything further.
unsupported_features = self.__get_unsupported_features()
if any(unsupported_features):
msg = "Failing GS {0} as Unsupported features found: {1}".format(goal_state_id, ', '.join(unsupported_features))
logger.warn(msg)
self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Failed, seq_no=svd_sequence_number,
code=GoalStateAggregateStatusCodes.GoalStateUnsupportedRequiredFeatures,
message=msg)
add_event(op=WALAEventOperation.GoalStateUnsupportedFeatures,
is_success=False,
message=msg,
log_event=False)
else:
self.handle_ext_handlers(goal_state_id)
self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Success, seq_no=svd_sequence_number,
code=GoalStateAggregateStatusCodes.Success,
message="GoalState executed successfully")
except Exception as error:
msg = "Unexpected error when processing goal state:{0}".format(textutil.format_exception(error))
self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Failed, seq_no=svd_sequence_number,
code=GoalStateAggregateStatusCodes.GoalStateUnknownFailure,
message=msg)
logger.warn(msg)
add_event(op=WALAEventOperation.ExtensionProcessing,
is_success=False,
message=msg,
log_event=False)
@staticmethod
def get_ext_handler_instance_from_path(name, path, protocol, skip_handlers=None):
if not os.path.isdir(path) or re.match(HANDLER_NAME_PATTERN, name) is None:
return None
separator = name.rfind('-')
handler_name = name[0:separator]
if skip_handlers is not None and handler_name in skip_handlers:
# Handler in skip_handlers list, not parsing it
return None
eh = Extension(name=handler_name)
eh.version = str(FlexibleVersion(name[separator + 1:]))
return ExtHandlerInstance(eh, protocol)
def _cleanup_outdated_handlers(self):
# Skip cleanup if the previous GS was Unsupported
if self.__last_gs_unsupported():
return
handlers = []
pkgs = []
ext_handlers_in_gs = [ext_handler.name for ext_handler in self.ext_handlers]
# Build a collection of uninstalled handlers and orphaned packages
# Note:
# -- An orphaned package is one without a corresponding handler
# directory
for item, path in list_agent_lib_directory(skip_agent_package=True):
try:
handler_instance = ExtHandlersHandler.get_ext_handler_instance_from_path(name=item,
path=path,
protocol=self.protocol,
skip_handlers=ext_handlers_in_gs)
if handler_instance is not None:
# Since this handler name doesn't exist in the GS, marking it for deletion
handlers.append(handler_instance)
continue
except Exception:
continue
if os.path.isfile(path) and \
not os.path.isdir(path[0:-len(HANDLER_PKG_EXT)]):
if not re.match(_HANDLER_PKG_PATTERN, item):
continue
pkgs.append(path)
# Then, remove the orphaned packages
for pkg in pkgs:
try:
os.remove(pkg)
logger.verbose("Removed orphaned extension package {0}".format(pkg))
except OSError as e:
logger.warn("Failed to remove orphaned package {0}: {1}".format(pkg, e.strerror))
# Finally, remove the directories and packages of the orphaned handlers, i.e. Any extension directory that
# is still in the FileSystem but not in the GoalState
for handler in handlers:
handler.remove_ext_handler()
pkg = os.path.join(conf.get_lib_dir(), handler.get_full_name() + HANDLER_PKG_EXT)
if os.path.isfile(pkg):
try:
os.remove(pkg)
logger.verbose("Removed extension package {0}".format(pkg))
except OSError as e:
logger.warn("Failed to remove extension package {0}: {1}".format(pkg, e.strerror))
def _extensions_on_hold(self):
if conf.get_enable_overprovisioning():
if self.protocol.get_goal_state().extensions_goal_state.on_hold:
msg = "Extension handling is on hold"
logger.info(msg)
add_event(op=WALAEventOperation.ExtensionProcessing, message=msg)
return True
return False
@staticmethod
def __get_dependency_level(tup):
(extension, handler) = tup
if extension is not None:
return extension.dependency_level_sort_key(handler.state)
return handler.dependency_level_sort_key()
def __get_sorted_extensions_for_processing(self):
all_extensions = []
for handler in self.ext_handlers:
if any(handler.settings):
all_extensions.extend([(ext, handler) for ext in handler.settings])
else:
# We need to process the Handler even if no settings specified from CRP (legacy behavior)
logger.info("No extension/run-time settings settings found for {0}".format(handler.name))
all_extensions.append((None, handler))
all_extensions.sort(key=self.__get_dependency_level)
return all_extensions
def handle_ext_handlers(self, goal_state_id):
if not self.ext_handlers:
logger.info("No extension handlers found, not processing anything.")
return
wait_until = datetime.datetime.utcnow() + datetime.timedelta(minutes=_DEFAULT_EXT_TIMEOUT_MINUTES)
all_extensions = self.__get_sorted_extensions_for_processing()
# Since all_extensions are sorted based on sort_key, the last element would be the maximum based on the sort_key
max_dep_level = self.__get_dependency_level(all_extensions[-1]) if any(all_extensions) else 0
depends_on_err_msg = None
extensions_enabled = conf.get_extensions_enabled()
for extension, ext_handler in all_extensions:
handler_i = ExtHandlerInstance(ext_handler, self.protocol, extension=extension)
# In case of extensions disabled, we skip processing extensions. But CRP is still waiting for some status
# back for the skipped extensions. In order to propagate the status back to CRP, we will report status back
# here with an error message.
if not extensions_enabled:
agent_conf_file_path = get_osutil().agent_conf_file_path
msg = "Extension will not be processed since extension processing is disabled. To enable extension " \
"processing, set Extensions.Enabled=y in '{0}'".format(agent_conf_file_path)
ext_full_name = handler_i.get_extension_full_name(extension)
logger.info('')
logger.info("{0}: {1}".format(ext_full_name, msg))
add_event(op=WALAEventOperation.ExtensionProcessing, message="{0}: {1}".format(ext_full_name, msg))
handler_i.set_handler_status(status=ExtHandlerStatusValue.not_ready, message=msg, code=-1)
handler_i.create_status_file_if_not_exist(extension,
status=ExtensionStatusValue.error,
code=-1,
operation=handler_i.operation,
message=msg)
continue
# In case of depends-on errors, we skip processing extensions if there was an error processing dependent extensions.
# But CRP is still waiting for some status back for the skipped extensions. In order to propagate the status back to CRP,
# we will report status back here with the relevant error message for each of the dependent extension.
if depends_on_err_msg is not None:
# For MC extensions, report the HandlerStatus as is and create a new placeholder per extension if doesnt exist
if handler_i.should_perform_multi_config_op(extension):
# Ensure some handler status exists for the Handler, if not, set it here
if handler_i.get_handler_status() is None:
handler_i.set_handler_status(message=depends_on_err_msg, code=-1)
handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=-1,
operation=WALAEventOperation.ExtensionProcessing,
message=depends_on_err_msg)
# For SC extensions, overwrite the HandlerStatus with the relevant message
else:
handler_i.set_handler_status(message=depends_on_err_msg, code=-1)
continue
# Process extensions and get if it was successfully executed or not
extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id)
dep_level = self.__get_dependency_level((extension, ext_handler))
if 0 <= dep_level < max_dep_level:
extension_full_name = handler_i.get_extension_full_name(extension)
try:
# Do no wait for extension status if the handler failed
if not extension_success:
raise Exception("Skipping processing of extensions since execution of dependent extension {0} failed".format(
extension_full_name))
# Wait for the extension installation until it is handled.
# This is done for the install and enable. Not for the uninstallation.
# If handled successfully, proceed with the current handler.
# Otherwise, skip the rest of the extension installation.
self.wait_for_handler_completion(handler_i, wait_until, extension=extension)
except Exception as error:
logger.warn(
"Dependent extension {0} failed or timed out, will skip processing the rest of the extensions".format(
extension_full_name))
depends_on_err_msg = ustr(error)
add_event(name=extension_full_name,
version=handler_i.ext_handler.version,
op=WALAEventOperation.ExtensionProcessing,
is_success=False,
message=depends_on_err_msg)
@staticmethod
def wait_for_handler_completion(handler_i, wait_until, extension=None):
"""
Check the status of the extension being handled. Wait until it has a terminal state or times out.
:raises: Exception if it is not handled successfully.
"""
extension_name = handler_i.get_extension_full_name(extension)
# If the handler had no settings, we should not wait at all for handler to report status.
if extension is None:
logger.info("No settings found for {0}, not waiting for it's status".format(extension_name))
return
try:
ext_completed, status = False, None
# Keep polling for the extension status until it succeeds or times out
while datetime.datetime.utcnow() <= wait_until:
ext_completed, status = handler_i.is_ext_handling_complete(extension)
if ext_completed:
break
time.sleep(5)
except Exception as e:
msg = "Failed to wait for Handler completion due to unknown error. Marking the dependent extension as failed: {0}, {1}".format(
extension_name, textutil.format_exception(e))
raise Exception(msg)
# In case of timeout or terminal error state, we log it and raise
# Incase extension reported status at the last sec, we should prioritize reporting status over timeout
if not ext_completed and datetime.datetime.utcnow() > wait_until:
msg = "Dependent Extension {0} did not reach a terminal state within the allowed timeout. Last status was {1}".format(
extension_name, status)
raise Exception(msg)
if status != ExtensionStatusValue.success:
msg = "Dependent Extension {0} did not succeed. Status was {1}".format(extension_name, status)
raise Exception(msg)
def handle_ext_handler(self, ext_handler_i, extension, goal_state_id):
"""
Execute the requested command for the handler and return if success
:param ext_handler_i: The ExtHandlerInstance object to execute the command on
:param extension: The extension settings on which to run the command on
:param goal_state_id: ID of the current GoalState
:return: True if the operation was successful, False if not
"""
try:
# Ensure the extension config was valid
if ext_handler_i.ext_handler.is_invalid_setting:
raise ExtensionsGoalStateError(ext_handler_i.ext_handler.invalid_setting_reason)
handler_state = ext_handler_i.ext_handler.state
# The Guest Agent currently only supports 1 installed version per extension on the VM.
# If the extension version is unregistered and the customers wants to uninstall the extension,
# we should let it go through even if the installed version doesnt exist in Handler manifest (PIR) anymore.
# If target state is enabled and version not found in manifest, do not process the extension.
if ext_handler_i.decide_version(target_state=handler_state,
extension=extension) is None and handler_state == ExtensionRequestedState.Enabled:
handler_version = ext_handler_i.ext_handler.version
name = ext_handler_i.ext_handler.name
err_msg = "Unable to find version {0} in manifest for extension {1}".format(handler_version, name)
ext_handler_i.set_operation(WALAEventOperation.Download)
raise ExtensionError(msg=err_msg)
# Handle everything on an extension level rather than Handler level
ext_handler_i.logger.info("Target handler state: {0} [{1}]", handler_state, goal_state_id)
if handler_state == ExtensionRequestedState.Enabled:
self.handle_enable(ext_handler_i, extension)
elif handler_state == ExtensionRequestedState.Disabled:
self.handle_disable(ext_handler_i, extension)
elif handler_state == ExtensionRequestedState.Uninstall:
self.handle_uninstall(ext_handler_i, extension=extension)
else:
message = u"Unknown ext handler state:{0}".format(handler_state)
raise ExtensionError(message)
return True
except MultiConfigExtensionEnableError as error:
ext_name = ext_handler_i.get_extension_full_name(extension)
err_msg = "Error processing MultiConfig extension {0}: {1}".format(ext_name, ustr(error))
# This error is only thrown for enable operation on MultiConfig extension.
# Since these are maintained by the extensions, the expectation here is that they would update their status files appropriately with their errors.
# The extensions should already have a placeholder status file, but incase they dont, setting one here to fail fast.
ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code,
operation=ext_handler_i.operation, message=err_msg)
add_event(name=ext_name, version=ext_handler_i.ext_handler.version, op=ext_handler_i.operation,
is_success=False, log_event=True, message=err_msg)
except ExtensionsGoalStateError as error:
# Catch and report Invalid ExtensionConfig errors here to fail fast rather than timing out after 90 min
err_msg = "Ran into config errors: {0}. \nPlease retry again as another operation with updated settings".format(
ustr(error))
self.__handle_and_report_ext_handler_errors(ext_handler_i, error,
report_op=WALAEventOperation.InvalidExtensionConfig,
message=err_msg, extension=extension)
except ExtensionUpdateError as error:
# Not reporting the error as it has already been reported from the old version
self.__handle_and_report_ext_handler_errors(ext_handler_i, error, ext_handler_i.operation, ustr(error),
report=False, extension=extension)
except ExtensionDownloadError as error:
msg = "Failed to download artifacts: {0}".format(ustr(error))
self.__handle_and_report_ext_handler_errors(ext_handler_i, error, report_op=WALAEventOperation.Download,
message=msg, extension=extension)
except ExtensionError as error:
self.__handle_and_report_ext_handler_errors(ext_handler_i, error, ext_handler_i.operation, ustr(error),
extension=extension)
except Exception as error:
error.code = -1
self.__handle_and_report_ext_handler_errors(ext_handler_i, error, ext_handler_i.operation, ustr(error),
extension=extension)
return False
@staticmethod
def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, message, report=True, extension=None):
# This function is only called for Handler level errors, we capture MultiConfig errors separately,
# so report only HandlerStatus here.
ext_handler_i.set_handler_status(message=message, code=error.code)
# If the handler supports multi-config, create a status file with failed status if no status file exists.
# This is for correctly reporting errors back to CRP for failed Handler level operations for MultiConfig extensions.
# In case of Handler failures, we will retry each time for each extension, so we need to create a status
# file with failure since the extensions wont be called where they can create their status files.
# This way we guarantee reporting back to CRP
if ext_handler_i.should_perform_multi_config_op(extension):
ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code,
operation=report_op, message=message)
if report:
name = ext_handler_i.get_extension_full_name(extension)
handler_version = ext_handler_i.ext_handler.version
add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True,
message=message)
def handle_enable(self, ext_handler_i, extension):
"""
1- Ensure the handler is installed
2- Check if extension is enabled or disabled and then process accordingly
"""
uninstall_exit_code = None
old_ext_handler_i = ext_handler_i.get_installed_ext_handler()
current_handler_state = ext_handler_i.get_handler_state()
ext_handler_i.logger.info("[Enable] current handler state is: {0}", current_handler_state.lower())
# We go through the entire process of downloading and initializing the extension if it's either a fresh
# extension or if it's a retry of a previously failed upgrade.
if current_handler_state == ExtHandlerState.NotInstalled or current_handler_state == ExtHandlerState.FailedUpgrade:
self.__setup_new_handler(ext_handler_i, extension)
if old_ext_handler_i is None:
ext_handler_i.install(extension=extension)
elif ext_handler_i.version_ne(old_ext_handler_i):
# This is a special case, we need to update the handler version here but to do that we need to also
# disable each enabled extension of this handler.
uninstall_exit_code = ExtHandlersHandler._update_extension_handler_and_return_if_failed(
old_ext_handler_i, ext_handler_i, extension)
else:
ext_handler_i.ensure_consistent_data_for_mc()
ext_handler_i.update_settings(extension)
self.__handle_extension(ext_handler_i, extension, uninstall_exit_code)
@staticmethod
def __setup_new_handler(ext_handler_i, extension):
ext_handler_i.set_handler_state(ExtHandlerState.NotInstalled)
ext_handler_i.download()
ext_handler_i.initialize()
ext_handler_i.update_settings(extension)
@staticmethod
def __handle_extension(ext_handler_i, extension, uninstall_exit_code):
# Check if extension level settings provided for the handler, if not, call enable for the handler.
# This is legacy behavior, we can have handlers with no settings.
if extension is None:
ext_handler_i.enable()
return
# MultiConfig: Handle extension level ops here
ext_handler_i.logger.info("Requested extension state: {0}", extension.state)
if extension.state == ExtensionState.Enabled:
ext_handler_i.enable(extension, uninstall_exit_code=uninstall_exit_code)
elif extension.state == ExtensionState.Disabled:
# Only disable extension if the requested state == Disabled and current state is != Disabled
if ext_handler_i.get_extension_state(extension) != ExtensionState.Disabled:
# Extensions can only be disabled for Multi Config extensions. Disable operation for extension is
# tantamount to uninstalling Handler so ignoring errors incase of Disable failure and deleting state.
ext_handler_i.disable(extension, ignore_error=True)
else:
ext_handler_i.logger.info("Extension already disabled, not doing anything")
else:
raise ExtensionsGoalStateError(
"Unknown requested state for Extension {0}: {1}".format(extension.name, extension.state))
@staticmethod
def _update_extension_handler_and_return_if_failed(old_ext_handler_i, ext_handler_i, extension=None):
def execute_old_handler_command_and_return_if_succeeds(func):
"""
Created a common wrapper to execute all commands that need to be executed from the old handler
so that it can have a common exception handling mechanism
:param func: The command to be executed on the old handler
:return: True if command execution succeeds and False if it fails
"""
continue_on_update_failure = False
exit_code = 0
try:
continue_on_update_failure = ext_handler_i.load_manifest().is_continue_on_update_failure()
func()
except ExtensionError as e:
# Reporting the event with the old handler and raising a new ExtensionUpdateError to set the
# handler status on the new version
msg = "%s; ContinueOnUpdate: %s" % (ustr(e), continue_on_update_failure)
old_ext_handler_i.report_event(message=msg, is_success=False)
if not continue_on_update_failure:
raise ExtensionUpdateError(msg)
exit_code = e.code
if isinstance(e, ExtensionOperationError):
exit_code = e.exit_code # pylint: disable=E1101
logger.info("Continue on Update failure flag is set, proceeding with update")
return exit_code
disable_exit_codes = defaultdict(lambda: NOT_RUN)
# We only want to disable the old handler if it is currently enabled; no other state makes sense.
if old_ext_handler_i.get_handler_state() == ExtHandlerState.Enabled:
# Corner case - If the old handler is a Single config Handler with no extensions at all,
# we should just disable the handler
if not old_ext_handler_i.supports_multi_config and not any(old_ext_handler_i.extensions):
disable_exit_codes[
old_ext_handler_i.ext_handler.name] = execute_old_handler_command_and_return_if_succeeds(
func=partial(old_ext_handler_i.disable, extension=None))
# Else we disable all enabled extensions of this handler
# Note: If MC is supported this will disable only enabled_extensions else it will disable all extensions
for old_ext in old_ext_handler_i.enabled_extensions:
disable_exit_codes[old_ext.name] = execute_old_handler_command_and_return_if_succeeds(
func=partial(old_ext_handler_i.disable, extension=old_ext))
ext_handler_i.copy_status_files(old_ext_handler_i)
if ext_handler_i.version_gt(old_ext_handler_i):
ext_handler_i.update(disable_exit_codes=disable_exit_codes,
updating_from_version=old_ext_handler_i.ext_handler.version,
extension=extension)
else:
updating_from_version = ext_handler_i.ext_handler.version
old_ext_handler_i.update(handler_version=updating_from_version,
disable_exit_codes=disable_exit_codes, updating_from_version=updating_from_version,
extension=extension)
uninstall_exit_code = execute_old_handler_command_and_return_if_succeeds(
func=partial(old_ext_handler_i.uninstall, extension=extension))
old_ext_handler_i.remove_ext_handler()
ext_handler_i.update_with_install(uninstall_exit_code=uninstall_exit_code, extension=extension)
return uninstall_exit_code
def handle_disable(self, ext_handler_i, extension=None):
"""
Disable is a legacy behavior, CRP doesn't support it, its only for XML based extensions.
In case we get a disable request, just disable that extension.
"""
handler_state = ext_handler_i.get_handler_state()
ext_handler_i.logger.info("[Disable] current handler state is: {0}", handler_state.lower())
if handler_state == ExtHandlerState.Enabled:
ext_handler_i.disable(extension)
def handle_uninstall(self, ext_handler_i, extension):
"""
To Uninstall the handler, first ensure all extensions are disabled
1- Disable all enabled extensions first if Handler is Enabled and then Disable the handler
(disabled extensions wont have any extensions dependent on them so we can just go
ahead and remove all of them at once if HandlerState==Uninstall.
CRP will only set the HandlerState to Uninstall if all its extensions are set to be disabled)
2- Finally uninstall the handler
"""
handler_state = ext_handler_i.get_handler_state()
ext_handler_i.logger.info("[Uninstall] current handler state is: {0}", handler_state.lower())
if handler_state != ExtHandlerState.NotInstalled:
if handler_state == ExtHandlerState.Enabled:
# Corner case - Single config Handler with no extensions at all
# If there are no extension settings for Handler, we should just disable the handler
if not ext_handler_i.supports_multi_config and not any(ext_handler_i.extensions):
ext_handler_i.disable()
# If Handler is Enabled, there should be atleast 1 enabled extension for the handler
# Note: If MC is supported this will disable only enabled_extensions else it will disable all extensions
for enabled_ext in ext_handler_i.enabled_extensions:
ext_handler_i.disable(enabled_ext)
# Try uninstalling the extension and swallow any exceptions in case of failures after logging them
try:
ext_handler_i.uninstall(extension=extension)
except ExtensionError as e:
ext_handler_i.report_event(message=ustr(e), is_success=False)
ext_handler_i.remove_ext_handler()
def __get_handlers_on_file_system(self, goal_state_changed):
handlers_to_report = []
# Ignoring the `history` and `events` directories as they're not handlers and are agent-generated
for item, path in list_agent_lib_directory(skip_agent_package=True,
ignore_names=[EVENTS_DIRECTORY, ARCHIVE_DIRECTORY_NAME]):
try:
handler_instance = ExtHandlersHandler.get_ext_handler_instance_from_path(name=item,
path=path,
protocol=self.protocol)
if handler_instance is not None:
ext_handler = handler_instance.ext_handler
# For each handler we need to add extensions to report their status.
# For Single Config, we just need to add one extension with name as Handler Name
# For Multi Config, walk the config directory and find all unique extension names
# and add them as extensions to the handler.
extensions_names = set()
# Settings for Multi Config are saved as ..settings.
# Use this pattern to determine if Handler supports Multi Config or not and add extensions
for settings_path in glob.iglob(os.path.join(handler_instance.get_conf_dir(), "*.*.settings")):
match = re.search("(?P\\w+)\\.\\d+\\.settings", settings_path)
if match is not None:
extensions_names.add(match.group("extname"))
ext_handler.supports_multi_config = True
# If nothing found with that pattern then its a Single Config, add an extension with Handler Name
if not any(extensions_names):
extensions_names.add(ext_handler.name)
for ext_name in extensions_names:
ext = ExtensionSettings(name=ext_name)
# Fetch the last modified sequence number
seq_no, _ = handler_instance.get_status_file_path(ext)
ext.sequenceNumber = seq_no
# Append extension to the list of extensions for the handler
ext_handler.settings.append(ext)
handlers_to_report.append(ext_handler)
except Exception as error:
# Log error once per goal state
if goal_state_changed:
logger.warn("Can't fetch ExtHandler from path: {0}; Error: {1}".format(path, ustr(error)))
return handlers_to_report
def report_ext_handlers_status(self, goal_state_changed=False, vm_agent_update_status=None,
vm_agent_supports_fast_track=False):
"""
Go through handler_state dir, collect and report status.
Returns the status it reported, or None if an error occurred.
"""
try:
vm_status = VMStatus(status="Ready", message="Guest Agent is running",
gs_aggregate_status=self.__gs_aggregate_status,
vm_agent_update_status=vm_agent_update_status)
vm_status.vmAgent.set_supports_fast_track(vm_agent_supports_fast_track)
handlers_to_report = []
# In case of Unsupported error, report the status of the handlers in the VM
if self.__last_gs_unsupported():
handlers_to_report = self.__get_handlers_on_file_system(goal_state_changed)
# If GoalState supported, report the status of extension handlers that were requested by the GoalState
elif not self.__last_gs_unsupported() and self.ext_handlers is not None:
handlers_to_report = self.ext_handlers
for ext_handler in handlers_to_report:
try:
self.report_ext_handler_status(vm_status, ext_handler, goal_state_changed)
except ExtensionError as error:
add_event(op=WALAEventOperation.ExtensionProcessing, is_success=False, message=ustr(error))
logger.verbose("Report vm agent status")
try:
self.protocol.report_vm_status(vm_status)
logger.verbose("Completed vm agent status report successfully")
self.report_status_error_state.reset()
except ProtocolNotFoundError as error:
self.report_status_error_state.incr()
message = "Failed to report vm agent status: {0}".format(error)
logger.verbose(message)
except ProtocolError as error:
self.report_status_error_state.incr()
message = "Failed to report vm agent status: {0}".format(error)
add_event(AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.ExtensionProcessing,
is_success=False,
message=message)
if self.report_status_error_state.is_triggered():
message = "Failed to report vm agent status for more than {0}" \
.format(self.report_status_error_state.min_timedelta)
add_event(AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.ReportStatusExtended,
is_success=False,
message=message)
self.report_status_error_state.reset()
return vm_status
except Exception as error:
msg = u"Failed to report status: {0}".format(textutil.format_exception(error))
logger.warn(msg)
add_event(AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.ReportStatus,
is_success=False,
message=msg)
return None
def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed):
ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol)
handler_status = ext_handler_i.get_handler_status()
# If nothing available, skip reporting
if handler_status is None:
# We should always have some handler status if requested state != Uninstall irrespective of single or
# multi-config. If state is != Uninstall, report error
if ext_handler.state != ExtensionRequestedState.Uninstall:
msg = "No handler status found for {0}. Not reporting anything for it.".format(ext_handler.name)
ext_handler_i.report_error_on_incarnation_change(goal_state_changed, log_msg=msg, event_msg=msg)
return
handler_state = ext_handler_i.get_handler_state()
ext_handler_statuses = []
# For MultiConfig, we need to report status per extension even for Handler level failures.
# If we have HandlerStatus for a MultiConfig handler and GS is requesting for it, we would report status per
# extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc)
# We also need to report extension status for an uninstalled handler if extensions are disabled because CRP
# waits for extension runtime status before failing the extension operation.
if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled():
# Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found.
# Only try to read heartbeat if HandlerState != NotInstalled.
if handler_state != ExtHandlerState.NotInstalled:
# Heartbeat is a handler level thing only, so we dont need to modify this
try:
heartbeat = ext_handler_i.collect_heartbeat()
if heartbeat is not None:
handler_status.status = heartbeat.get('status')
if 'formattedMessage' in heartbeat:
handler_status.message = parse_formatted_message(heartbeat.get('formattedMessage'))
except ExtensionError as e:
ext_handler_i.set_handler_status(message=ustr(e), code=e.code)
ext_handler_statuses = ext_handler_i.get_extension_handler_statuses(handler_status, goal_state_changed)
# If not any extension status reported, report the Handler status
if not any(ext_handler_statuses):
ext_handler_statuses.append(handler_status)
vm_status.vmAgent.extensionHandlers.extend(ext_handler_statuses)
class ExtHandlerInstance(object):
def __init__(self, ext_handler, protocol, execution_log_max_size=(10 * 1024 * 1024), extension=None):
self.ext_handler = ext_handler
self.protocol = protocol
self.operation = None
self.pkg = None
self.pkg_file = None
self.logger = None
self.set_logger(extension=extension, execution_log_max_size=execution_log_max_size)
@property
def supports_multi_config(self):
return self.ext_handler.supports_multi_config
@property
def extensions(self):
return self.ext_handler.settings
@property
def enabled_extensions(self):
"""
In case of Single config, just return all the extensions of the handler
(expectation being that there'll only be a single extension per handler).
We will not be maintaining extension level state for Single config Handlers
"""
if self.supports_multi_config:
return [ext for ext in self.extensions if self.get_extension_state(ext) == ExtensionState.Enabled]
return self.extensions
def get_extension_full_name(self, extension=None):
"""
Get the full name of the extension ..
:param extension: The requested extension
:return: if MultiConfig not supported or extension == None, else .
"""
if self.should_perform_multi_config_op(extension):
return "{0}.{1}".format(self.ext_handler.name, extension.name)
return self.ext_handler.name
def __set_command_execution_log(self, extension, execution_log_max_size):
try:
fileutil.mkdir(self.get_log_dir(), mode=0o755, reset_mode_and_owner=False)
except IOError as e:
self.logger.error(u"Failed to create extension log dir: {0}", e)
else:
log_file_name = "CommandExecution.log" if not self.should_perform_multi_config_op(
extension) else "CommandExecution_{0}.log".format(extension.name)
log_file = os.path.join(self.get_log_dir(), log_file_name)
self.__truncate_file_head(log_file, execution_log_max_size, self.get_extension_full_name(extension))
self.logger.add_appender(logger.AppenderType.FILE, logger.LogLevel.INFO, log_file)
@staticmethod
def __truncate_file_head(filename, max_size, extension_name):
try:
if os.stat(filename).st_size <= max_size:
return
with open(filename, "rb") as existing_file:
existing_file.seek(-1 * max_size, 2)
_ = existing_file.readline()
with open(filename + ".tmp", "wb") as tmp_file:
shutil.copyfileobj(existing_file, tmp_file)
os.rename(filename + ".tmp", filename)
except (IOError, OSError) as e:
if is_file_not_found_error(e):
# If CommandExecution.log does not exist, it's not noteworthy;
# this just means that no extension with self.ext_handler.name is
# installed.
return
logger.error(
"Exception occurred while attempting to truncate {0} for extension {1}. Exception is: {2}",
filename, extension_name, ustr(e))
for f in (filename, filename + ".tmp"):
try:
os.remove(f)
except (IOError, OSError) as cleanup_exception:
if is_file_not_found_error(cleanup_exception):
logger.info("File '{0}' does not exist.", f)
else:
logger.warn("Exception occurred while attempting to remove file '{0}': {1}", f,
cleanup_exception)
def decide_version(self, target_state=None, extension=None):
self.logger.verbose("Decide which version to use")
try:
manifest = self.protocol.get_goal_state().fetch_extension_manifest(self.ext_handler.name, self.ext_handler.manifest_uris)
pkg_list = manifest.pkg_list
except ProtocolError as e:
raise ExtensionError("Failed to get ext handler pkgs", e)
except ExtensionDownloadError:
self.set_operation(WALAEventOperation.Download)
raise
# Determine the desired and installed versions
requested_version = FlexibleVersion(str(self.ext_handler.version))
installed_version_string = self.get_installed_version()
installed_version = requested_version if installed_version_string is None else FlexibleVersion(installed_version_string)
# Divide packages
# - Find the installed package (its version must exactly match)
# - Find the internal candidate (its version must exactly match)
# - Separate the public packages
selected_pkg = None
installed_pkg = None
pkg_list.versions.sort(key=lambda p: FlexibleVersion(p.version))
for pkg in pkg_list.versions:
pkg_version = FlexibleVersion(pkg.version)
if pkg_version == installed_version:
installed_pkg = pkg
if requested_version.matches(pkg_version):
selected_pkg = pkg
# Finally, update the version only if not downgrading
# Note:
# - A downgrade, which will be bound to the same major version,
# is allowed if the installed version is no longer available
if target_state in (ExtensionRequestedState.Uninstall, ExtensionRequestedState.Disabled):
if installed_pkg is None:
msg = "Failed to find installed version: {0} of Handler: {1} in handler manifest to uninstall.".format(
installed_version, self.ext_handler.name)
self.logger.warn(msg)
self.pkg = installed_pkg
self.ext_handler.version = str(installed_version) \
if installed_version is not None else None
else:
self.pkg = selected_pkg
if self.pkg is not None:
self.ext_handler.version = str(selected_pkg.version)
if self.pkg is not None:
self.logger.verbose("Use version: {0}", self.pkg.version)
# We reset the logger here incase the handler version changes
if not requested_version.matches(FlexibleVersion(self.ext_handler.version)):
self.set_logger(extension=extension)
return self.pkg
def set_logger(self, execution_log_max_size=(10 * 1024 * 1024), extension=None):
prefix = "[{0}]".format(self.get_full_name(extension))
self.logger = logger.Logger(logger.DEFAULT_LOGGER, prefix)
self.__set_command_execution_log(extension, execution_log_max_size)
def version_gt(self, other):
self_version = self.ext_handler.version
other_version = other.ext_handler.version
return FlexibleVersion(self_version) > FlexibleVersion(other_version)
def version_ne(self, other):
self_version = self.ext_handler.version
other_version = other.ext_handler.version
return FlexibleVersion(self_version) != FlexibleVersion(other_version)
def get_installed_ext_handler(self):
latest_version = self.get_installed_version()
if latest_version is None:
return None
installed_handler = copy.deepcopy(self.ext_handler)
installed_handler.version = latest_version
return ExtHandlerInstance(installed_handler, self.protocol)
def get_installed_version(self):
latest_version = None
for path in glob.iglob(os.path.join(conf.get_lib_dir(), self.ext_handler.name + "-*")):
if not os.path.isdir(path):
continue
separator = path.rfind('-')
version_from_path = FlexibleVersion(path[separator + 1:])
state_path = os.path.join(path, 'config', 'HandlerState')
if not os.path.exists(state_path) or fileutil.read_file(state_path) == ExtHandlerState.NotInstalled \
or fileutil.read_file(state_path) == ExtHandlerState.FailedUpgrade:
logger.verbose("Ignoring version of uninstalled or failed extension: {0}".format(path))
continue
if latest_version is None or latest_version < version_from_path:
latest_version = version_from_path
return str(latest_version) if latest_version is not None else None
def copy_status_files(self, old_ext_handler_i):
self.logger.info("Copy status files from old plugin to new")
old_ext_dir = old_ext_handler_i.get_base_dir()
new_ext_dir = self.get_base_dir()
old_ext_mrseq_file = os.path.join(old_ext_dir, "mrseq")
if os.path.isfile(old_ext_mrseq_file):
logger.info("Migrating {0} to {1}.", old_ext_mrseq_file, new_ext_dir)
shutil.copy2(old_ext_mrseq_file, new_ext_dir)
else:
logger.info("{0} does not exist, no migration is needed.", old_ext_mrseq_file)
old_ext_status_dir = old_ext_handler_i.get_status_dir()
new_ext_status_dir = self.get_status_dir()
if os.path.isdir(old_ext_status_dir):
for status_file in os.listdir(old_ext_status_dir):
status_file = os.path.join(old_ext_status_dir, status_file)
if os.path.isfile(status_file):
shutil.copy2(status_file, new_ext_status_dir)
def set_operation(self, op):
self.operation = op
def report_event(self, name=None, message="", is_success=True, duration=0, log_event=True):
ext_handler_version = self.ext_handler.version
name = self.ext_handler.name if name is None else name
add_event(name=name, version=ext_handler_version, message=message,
op=self.operation, is_success=is_success, duration=duration, log_event=log_event)
def _unzip_extension_package(self, source_file, target_directory):
self.logger.info("Unzipping extension package: {0}", source_file)
try:
zipfile.ZipFile(source_file).extractall(target_directory)
except Exception as exception:
logger.info("Error while unzipping extension package: {0}", ustr(exception))
os.remove(source_file)
if os.path.exists(target_directory):
shutil.rmtree(target_directory)
return False
return True
def download(self):
begin_utc = datetime.datetime.utcnow()
self.set_operation(WALAEventOperation.Download)
if self.pkg is None or self.pkg.uris is None or len(self.pkg.uris) == 0:
raise ExtensionDownloadError("No package uri found")
package_file = os.path.join(conf.get_lib_dir(), self.get_extension_package_zipfile_name())
package_exists = False
if os.path.exists(package_file):
self.logger.info("Using existing extension package: {0}", package_file)
if self._unzip_extension_package(package_file, self.get_base_dir()):
package_exists = True
else:
self.logger.info("The existing extension package is invalid, will ignore it.")
if not package_exists:
is_fast_track_goal_state = self.protocol.get_goal_state().extensions_goal_state.source == GoalStateSource.FastTrack
self.protocol.client.download_zip_package("extension package", self.pkg.uris, package_file, self.get_base_dir(), use_verify_header=is_fast_track_goal_state)
self.report_event(message="Download succeeded", duration=elapsed_milliseconds(begin_utc))
self.pkg_file = package_file
def ensure_consistent_data_for_mc(self):
# If CRP expects Handler to support MC, ensure the HandlerManifest also reflects that.
# Even though the HandlerManifest.json is not expected to change once the extension is installed,
# CRP can wrongfully request send a Multi-Config GoalState even if the Handler supports only Single Config.
# Checking this only if HandlerState == Enable. In case of Uninstall, we dont care.
if self.supports_multi_config and not self.load_manifest().supports_multiple_extensions():
raise ExtensionsGoalStateError(
"Handler {0} does not support MultiConfig but CRP expects it, failing due to inconsistent data".format(
self.ext_handler.name))
def initialize(self):
self.logger.info("Initializing extension {0}".format(self.get_full_name()))
# Add user execute permission to all files under the base dir
for file in fileutil.get_all_files(self.get_base_dir()): # pylint: disable=redefined-builtin
fileutil.chmod(file, os.stat(file).st_mode | stat.S_IXUSR)
# Save HandlerManifest.json
man_file = fileutil.search_file(self.get_base_dir(), 'HandlerManifest.json')
if man_file is None:
raise ExtensionDownloadError("HandlerManifest.json not found")
try:
man = fileutil.read_file(man_file, remove_bom=True)
fileutil.write_file(self.get_manifest_file(), man)
except IOError as e:
fileutil.clean_ioerror(e, paths=[self.get_base_dir(), self.pkg_file])
raise ExtensionDownloadError(u"Failed to save HandlerManifest.json", e)
self.ensure_consistent_data_for_mc()
# Create status and config dir
try:
status_dir = self.get_status_dir()
fileutil.mkdir(status_dir, mode=0o700)
conf_dir = self.get_conf_dir()
fileutil.mkdir(conf_dir, mode=0o700)
if get_supported_feature_by_name(SupportedFeatureNames.ExtensionTelemetryPipeline).is_supported:
fileutil.mkdir(self.get_extension_events_dir(), mode=0o700)
except IOError as e:
fileutil.clean_ioerror(e, paths=[self.get_base_dir(), self.pkg_file])
raise ExtensionDownloadError(u"Failed to initialize extension '{0}'".format(self.get_full_name()), e)
# Save HandlerEnvironment.json
self.create_handler_env()
self.set_extension_resource_limits()
def set_extension_resource_limits(self):
extension_name = self.get_full_name()
# setup the resource limits for extension operations and it's services.
man = self.load_manifest()
resource_limits = man.get_resource_limits(extension_name, self.ext_handler.version)
if not CGroupConfigurator.get_instance().is_extension_resource_limits_setup_completed(extension_name,
cpu_quota=resource_limits.get_extension_slice_cpu_quota()):
CGroupConfigurator.get_instance().setup_extension_slice(
extension_name=extension_name, cpu_quota=resource_limits.get_extension_slice_cpu_quota())
CGroupConfigurator.get_instance().set_extension_services_cpu_memory_quota(resource_limits.get_service_list())
def create_status_file_if_not_exist(self, extension, status, code, operation, message):
_, status_path = self.get_status_file_path(extension)
if status_path is not None and not os.path.exists(status_path):
now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
status_contents = [
{
"version": 1.0,
"timestampUTC": now,
"status": {
"name": self.get_extension_full_name(extension),
"operation": operation,
"status": status,
"code": code,
"formattedMessage": {
"lang": "en-US",
"message": message
}
}
}
]
# Create status directory if not exists. This is needed in the case where the Handler fails before even
# initializing the directories (ExtensionsGoalStateError, Version deleted from PIR error, etc)
if not os.path.exists(os.path.dirname(status_path)):
fileutil.mkdir(os.path.dirname(status_path), mode=0o700)
self.logger.info("Creating a placeholder status file {0} with status: {1}".format(status_path, status))
fileutil.write_file(status_path, json.dumps(status_contents))
def enable(self, extension=None, uninstall_exit_code=None):
try:
self._enable_extension(extension, uninstall_exit_code)
except ExtensionError as error:
if self.should_perform_multi_config_op(extension):
raise MultiConfigExtensionEnableError(error)
raise
# Even if a single extension is enabled for this handler, set the Handler state as Enabled
self.set_handler_state(ExtHandlerState.Enabled)
self.set_handler_status(status=ExtHandlerStatusValue.ready, message="Plugin enabled")
def should_perform_multi_config_op(self, extension):
return self.supports_multi_config and extension is not None
def _enable_extension(self, extension, uninstall_exit_code):
uninstall_exit_code = str(uninstall_exit_code) if uninstall_exit_code is not None else NOT_RUN
env = {
ExtCommandEnvVariable.UninstallReturnCode: uninstall_exit_code
}
# This check to call the setup if extension already installed and not called setup before
self.set_extension_resource_limits()
self.set_operation(WALAEventOperation.Enable)
man = self.load_manifest()
enable_cmd = man.get_enable_command()
self.logger.info("Enable extension: [{0}]".format(enable_cmd))
self.launch_command(enable_cmd, cmd_name="enable", timeout=300,
extension_error_code=ExtensionErrorCodes.PluginEnableProcessingFailed, env=env,
extension=extension)
if self.should_perform_multi_config_op(extension):
# Only save extension state if MC supported
self.__set_extension_state(extension, ExtensionState.Enabled)
# start tracking the extension services cgroup.
resource_limits = man.get_resource_limits(self.get_full_name(), self.ext_handler.version)
CGroupConfigurator.get_instance().start_tracking_extension_services_cgroups(
resource_limits.get_service_list())
def _disable_extension(self, extension=None):
self.set_operation(WALAEventOperation.Disable)
man = self.load_manifest()
disable_cmd = man.get_disable_command()
self.logger.info("Disable extension: [{0}]".format(disable_cmd))
self.launch_command(disable_cmd, cmd_name="disable", timeout=900,
extension_error_code=ExtensionErrorCodes.PluginDisableProcessingFailed,
extension=extension)
def disable(self, extension=None, ignore_error=False):
try:
self._disable_extension(extension)
except ExtensionError as error:
if not ignore_error:
raise
msg = "[Ignored Error] Ran into error disabling extension:{0}".format(ustr(error))
self.logger.info(msg)
self.report_event(name=self.get_extension_full_name(extension), message=msg, is_success=False,
log_event=False)
# Clean extension state For Multi Config extensions on Disable
if self.should_perform_multi_config_op(extension):
self.__remove_extension_state_files(extension)
# For Single config, dont check enabled_extensions because no extension state is maintained.
# For MultiConfig, Set the handler state to Installed only when all extensions have been disabled
if not self.supports_multi_config or not any(self.enabled_extensions):
self.set_handler_state(ExtHandlerState.Installed)
self.set_handler_status(status=ExtHandlerStatusValue.not_ready, message="Plugin disabled")
def install(self, uninstall_exit_code=None, extension=None):
# For Handler level operations, extension just specifies the settings that initiated the install.
# This is needed to provide the sequence number and extension name in case the extension needs to report
# failure/status using status file.
uninstall_exit_code = str(uninstall_exit_code) if uninstall_exit_code is not None else NOT_RUN
env = {ExtCommandEnvVariable.UninstallReturnCode: uninstall_exit_code}
man = self.load_manifest()
install_cmd = man.get_install_command()
self.logger.info("Install extension [{0}]".format(install_cmd))
self.set_operation(WALAEventOperation.Install)
self.launch_command(install_cmd, cmd_name="install", timeout=900, extension=extension,
extension_error_code=ExtensionErrorCodes.PluginInstallProcessingFailed, env=env)
self.set_handler_state(ExtHandlerState.Installed)
self.set_handler_status(status=ExtHandlerStatusValue.not_ready, message="Plugin installed but not enabled")
def uninstall(self, extension=None):
# For Handler level operations, extension just specifies the settings that initiated the uninstall.
# This is needed to provide the sequence number and extension name in case the extension needs to report
# failure/status using status file.
self.set_operation(WALAEventOperation.UnInstall)
man = self.load_manifest()
# stop tracking extension services cgroup.
resource_limits = man.get_resource_limits(self.get_full_name(), self.ext_handler.version)
CGroupConfigurator.get_instance().stop_tracking_extension_services_cgroups(
resource_limits.get_service_list())
CGroupConfigurator.get_instance().remove_extension_services_drop_in_files(
resource_limits.get_service_list())
uninstall_cmd = man.get_uninstall_command()
self.logger.info("Uninstall extension [{0}]".format(uninstall_cmd))
self.launch_command(uninstall_cmd, cmd_name="uninstall", extension=extension)
def remove_ext_handler(self):
try:
zip_filename = os.path.join(conf.get_lib_dir(), self.get_extension_package_zipfile_name())
if os.path.exists(zip_filename):
os.remove(zip_filename)
self.logger.verbose("Deleted the extension zip at path {0}", zip_filename)
base_dir = self.get_base_dir()
if os.path.isdir(base_dir):
self.logger.info("Remove extension handler directory: {0}", base_dir)
# some extensions uninstall asynchronously so ignore error 2 while removing them
def on_rmtree_error(_, __, exc_info):
_, exception, _ = exc_info
if not isinstance(exception, OSError) or exception.errno != 2: # [Errno 2] No such file or directory
raise exception
shutil.rmtree(base_dir, onerror=on_rmtree_error)
self.logger.info("Remove the extension slice: {0}".format(self.get_full_name()))
CGroupConfigurator.get_instance().remove_extension_slice(
extension_name=self.get_full_name())
except IOError as e:
message = "Failed to remove extension handler directory: {0}".format(e)
self.report_event(message=message, is_success=False)
self.logger.warn(message)
def update(self, handler_version=None, disable_exit_codes=None, updating_from_version=None, extension=None):
# For Handler level operations, extension just specifies the settings that initiated the update.
# This is needed to provide the sequence number and extension name in case the extension needs to report
# failure/status using status file.
if handler_version is None:
handler_version = self.ext_handler.version
env = {
'VERSION': handler_version,
ExtCommandEnvVariable.UpdatingFromVersion: updating_from_version
}
if not self.supports_multi_config:
# For single config, extension.name == ext_handler.name
env[ExtCommandEnvVariable.DisableReturnCode] = ustr(disable_exit_codes.get(self.ext_handler.name))
else:
disable_codes = []
for ext in self.extensions:
disable_codes.append({
"extensionName": ext.name,
"exitCode": ustr(disable_exit_codes.get(ext.name))
})
env[ExtCommandEnvVariable.DisableReturnCodeMultipleExtensions] = json.dumps(disable_codes)
try:
self.set_operation(WALAEventOperation.Update)
man = self.load_manifest()
update_cmd = man.get_update_command()
self.logger.info("Update extension [{0}]".format(update_cmd))
self.launch_command(update_cmd, cmd_name="update",
timeout=900,
extension_error_code=ExtensionErrorCodes.PluginUpdateProcessingFailed,
env=env, extension=extension)
except ExtensionError:
# Mark the handler as Failed so we don't clean it up and can keep reporting its status
self.set_handler_state(ExtHandlerState.FailedUpgrade)
raise
def update_with_install(self, uninstall_exit_code=None, extension=None):
man = self.load_manifest()
if man.is_update_with_install():
self.install(uninstall_exit_code=uninstall_exit_code, extension=extension)
else:
self.logger.info("UpdateWithInstall not set. "
"Skip install during upgrade.")
self.set_handler_state(ExtHandlerState.Installed)
def _get_last_modified_seq_no_from_config_files(self, extension):
"""
The sequence number is not guaranteed to always be strictly increasing. To ensure we always get the latest one,
fetching the sequence number from config file that was last modified (and not necessarily the largest).
:return: Last modified Sequence number or -1 on errors
"""
seq_no = -1
if self.supports_multi_config and (extension is None or extension.name is None):
# If no extension name is provided for Multi Config, don't try to parse any sequence number from filesystem
return seq_no
try:
largest_modified_time = 0
conf_dir = self.get_conf_dir()
for item in os.listdir(conf_dir):
item_path = os.path.join(conf_dir, item)
if not os.path.isfile(item_path):
continue
try:
# Settings file for Multi Config look like - ..settings
# Settings file for Single Config look like - .settings
match = re.search("((?P\\w+)\\.)*(?P\\d+)\\.settings", item_path)
if match is not None:
ext_name = match.group('ext_name')
if self.supports_multi_config and extension.name != ext_name:
continue
curr_seq_no = int(match.group("seq_no"))
curr_modified_time = os.path.getmtime(item_path)
if curr_modified_time > largest_modified_time:
seq_no = curr_seq_no
largest_modified_time = curr_modified_time
except (ValueError, IndexError, TypeError):
self.logger.verbose("Failed to parse file name: {0}", item)
continue
except Exception as error:
logger.verbose("Error fetching sequence number from config files: {0}".format(ustr(error)))
seq_no = -1
return seq_no
def get_status_file_path(self, extension=None):
"""
We should technically only fetch the sequence number from GoalState and not rely on the filesystem at all,
But there are certain scenarios where we need to fetch the latest sequence number from the filesystem
(For example when we need to report the status for extensions of previous GS if the current GS is Unsupported).
Always prioritizing sequence number from extensions but falling back to filesystem
:param extension: Extension for which the sequence number is required
:return: Sequence number for the extension, Status file path or -1, None
"""
path = None
seq_no = None
if extension is not None and extension.sequenceNumber is not None:
try:
seq_no = int(extension.sequenceNumber)
except ValueError:
logger.error('Sequence number [{0}] does not appear to be valid'.format(extension.sequenceNumber))
if seq_no is None:
# If we're unable to fetch Sequence number from Extension for any reason,
# try fetching it from the last modified Settings file.
seq_no = self._get_last_modified_seq_no_from_config_files(extension)
if seq_no is not None and seq_no > -1:
if self.should_perform_multi_config_op(extension) and extension is not None and extension.name is not None:
path = os.path.join(self.get_status_dir(), "{0}.{1}.status".format(extension.name, seq_no))
elif not self.supports_multi_config:
path = os.path.join(self.get_status_dir(), "{0}.status").format(seq_no)
return seq_no if seq_no is not None else -1, path
def collect_ext_status(self, ext):
self.logger.verbose("Collect extension status for {0}".format(self.get_extension_full_name(ext)))
seq_no, ext_status_file = self.get_status_file_path(ext)
# We should never try to read any status file if the handler has no settings, returning None in that case
if seq_no == -1 or ext is None:
return None
data = None
data_str = None
# Extension.name contains the extension name in case of MC and Handler name in case of Single Config.
ext_status = ExtensionStatus(name=ext.name, seq_no=seq_no)
try:
data_str, data = self._read_status_file(ext_status_file)
except ExtensionStatusError as e:
msg = ""
ext_status.status = ExtensionStatusValue.error
if e.code == ExtensionStatusError.CouldNotReadStatusFile:
ext_status.code = ExtensionErrorCodes.PluginUnknownFailure
msg = u"We couldn't read any status for {0} extension, for the sequence number {1}. It failed due" \
u" to {2}".format(self.get_full_name(ext), seq_no, ustr(e))
elif e.code == ExtensionStatusError.InvalidJsonFile:
ext_status.code = ExtensionErrorCodes.PluginSettingsStatusInvalid
msg = u"The status reported by the extension {0}(Sequence number {1}), was in an " \
u"incorrect format and the agent could not parse it correctly. Failed due to {2}" \
.format(self.get_full_name(ext), seq_no, ustr(e))
elif e.code == ExtensionStatusError.FileNotExists:
msg = "This status is being reported by the Guest Agent since no status file was " \
"reported by extension {0}: {1}".format(self.get_extension_full_name(ext), ustr(e))
# Reporting a success code and transitioning status to keep in accordance with existing code that
# creates default status placeholder file
ext_status.code = ExtensionErrorCodes.PluginSuccess
ext_status.status = ExtensionStatusValue.transitioning
# This log is periodic due to the verbose nature of the status check. Please make sure that the message
# constructed above does not change very frequently and includes important info such as sequence number,
# extension name to make sure that the log reflects changes in the extension sequence for which the
# status is being sent.
logger.periodic_warn(logger.EVERY_HALF_HOUR, u"[PERIODIC] " + msg)
add_periodic(delta=logger.EVERY_HALF_HOUR, name=self.get_extension_full_name(ext),
version=self.ext_handler.version,
op=WALAEventOperation.StatusProcessing, is_success=False, message=msg,
log_event=False)
ext_status.message = msg
return ext_status
# We did not encounter InvalidJsonFile/CouldNotReadStatusFile and thus the status file was correctly written
# and has valid json.
try:
parse_ext_status(ext_status, data)
if len(data_str) > _MAX_STATUS_FILE_SIZE_IN_BYTES:
raise ExtensionStatusError(msg="For Extension Handler {0} for the sequence number {1}, the status "
"file {2} of size {3} bytes is too big. Max Limit allowed is {4} bytes"
.format(self.get_full_name(ext), seq_no,
ext_status_file, len(data_str), _MAX_STATUS_FILE_SIZE_IN_BYTES),
code=ExtensionStatusError.MaxSizeExceeded)
except ExtensionStatusError as e:
msg = u"For Extension Handler {0} for the sequence number {1}, the status file {2}. " \
u"Encountered the following error: {3}".format(self.get_full_name(ext), seq_no,
ext_status_file, ustr(e))
logger.periodic_warn(logger.EVERY_DAY, u"[PERIODIC] " + msg)
add_periodic(delta=logger.EVERY_HALF_HOUR, name=self.get_extension_full_name(ext),
version=self.ext_handler.version,
op=WALAEventOperation.StatusProcessing, is_success=False, message=msg, log_event=False)
if e.code == ExtensionStatusError.MaxSizeExceeded:
ext_status.message, field_size = self._truncate_message(ext_status.message, _MAX_STATUS_MESSAGE_LENGTH)
ext_status.substatusList = self._process_substatus_list(ext_status.substatusList, field_size)
elif e.code == ExtensionStatusError.StatusFileMalformed:
ext_status.message = "Could not get a valid status from the extension {0}. Encountered the " \
"following error: {1}".format(self.get_full_name(ext), ustr(e))
ext_status.code = ExtensionErrorCodes.PluginSettingsStatusInvalid
ext_status.status = ExtensionStatusValue.error
return ext_status
def get_ext_handling_status(self, ext):
seq_no, ext_status_file = self.get_status_file_path(ext)
# This is legacy scenario for cases when no extension settings is available
if seq_no < 0 or ext_status_file is None:
return None
# Missing status file is considered a non-terminal state here
# so that extension sequencing can wait until it becomes existing
if not os.path.exists(ext_status_file):
status = ExtensionStatusValue.warning
else:
ext_status = self.collect_ext_status(ext)
status = ext_status.status if ext_status is not None else None
return status
def is_ext_handling_complete(self, ext):
status = self.get_ext_handling_status(ext)
# when seq < 0 (i.e. no new user settings), the handling is complete and return None status
if status is None:
return True, None
# If not in terminal state, it is incomplete
if status not in _EXTENSION_TERMINAL_STATUSES:
return False, status
# Extension completed, return its status
return True, status
def report_error_on_incarnation_change(self, goal_state_changed, log_msg, event_msg, extension=None,
op=WALAEventOperation.ReportStatus):
# Since this code is called on a loop, logging as a warning only on goal state change, else logging it
# as verbose
if goal_state_changed:
logger.warn(log_msg)
add_event(name=self.get_extension_full_name(extension), version=self.ext_handler.version,
op=op, message=event_msg, is_success=False, log_event=False)
else:
logger.verbose(log_msg)
def get_extension_handler_statuses(self, handler_status, goal_state_changed):
"""
Get the list of ExtHandlerStatus objects corresponding to each extension in the Handler. Each object might have
its own status for the Extension status but the Handler status would be the same for each extension in a Handle
:return: List of ExtHandlerStatus objects for each extension in the Handler
"""
ext_handler_statuses = []
# TODO Refactor or remove this common code pattern (for each extension subordinate to an ext_handler, do X).
for ext in self.extensions:
# In MC, for disabled extensions we dont need to report status. Skip reporting if disabled and state == disabled
# Extension.state corresponds to the state requested by CRP, self.__get_extension_state() corresponds to the
# state of the extension on the VM. Skip reporting only if both are Disabled
if self.should_perform_multi_config_op(ext) and \
ext.state == ExtensionState.Disabled and self.get_extension_state(ext) == ExtensionState.Disabled:
continue
# Breaking off extension reporting in 2 parts, one which is Handler dependent and the other that is Extension dependent
try:
ext_handler_status = ExtHandlerStatus()
set_properties("ExtHandlerStatus", ext_handler_status, get_properties(handler_status))
except Exception as error:
msg = "Something went wrong when trying to get a copy of the Handler status for {0}".format(
self.get_extension_full_name())
self.report_error_on_incarnation_change(goal_state_changed, event_msg=msg,
log_msg="{0}.\nStack Trace: {1}".format(
msg, textutil.format_exception(error)))
# Since this is a Handler level error and we need to do it per extension, breaking here and logging
# error since we wont be able to report error anyways and saving it as a handler status (legacy behavior)
self.set_handler_status(message=msg, code=-1)
break
# For the extension dependent stuff, if there's some unhandled error, we will report it back to CRP as an extension error.
try:
ext_status = self.collect_ext_status(ext)
if ext_status is not None:
ext_handler_status.extension_status = ext_status
ext_handler_statuses.append(ext_handler_status)
except ExtensionError as error:
msg = "Unknown error when trying to fetch status from extension {0}".format(
self.get_extension_full_name(ext))
self.report_error_on_incarnation_change(goal_state_changed, event_msg=msg,
log_msg="{0}.\nStack Trace: {1}".format(
msg, textutil.format_exception(error)),
extension=ext)
# Unexpected error, for single config, keep the behavior as is
if not self.should_perform_multi_config_op(ext):
self.set_handler_status(message=ustr(error), code=error.code)
break
# For MultiConfig, create a custom ExtensionStatus object with the error details and attach it to the Handler.
# This way the error would be reported back to CRP and the failure would be propagated instantly as compared to CRP eventually timing it out.
ext_status = ExtensionStatus(name=ext.name, seq_no=ext.sequenceNumber,
code=ExtensionErrorCodes.PluginUnknownFailure,
status=ExtensionStatusValue.error, message=msg)
ext_handler_status.extension_status = ext_status
ext_handler_statuses.append(ext_handler_status)
return ext_handler_statuses
def collect_heartbeat(self): # pylint: disable=R1710
man = self.load_manifest()
if not man.is_report_heartbeat():
return
heartbeat_file = os.path.join(conf.get_lib_dir(),
self.get_heartbeat_file())
if not os.path.isfile(heartbeat_file):
raise ExtensionError("Failed to get heart beat file")
if not self.is_responsive(heartbeat_file):
return {
"status": "Unresponsive",
"code": -1,
"message": "Extension heartbeat is not responsive"
}
try:
heartbeat_json = fileutil.read_file(heartbeat_file)
heartbeat = json.loads(heartbeat_json)[0]['heartbeat']
except IOError as e:
raise ExtensionError("Failed to get heartbeat file:{0}".format(e))
except (ValueError, KeyError) as e:
raise ExtensionError("Malformed heartbeat file: {0}".format(e))
return heartbeat
@staticmethod
def is_responsive(heartbeat_file):
"""
Was heartbeat_file updated within the last ten (10) minutes?
:param heartbeat_file: str
:return: bool
"""
last_update = int(time.time() - os.stat(heartbeat_file).st_mtime)
return last_update <= 600
def launch_command(self, cmd, cmd_name=None, timeout=300, extension_error_code=ExtensionErrorCodes.PluginProcessingError,
env=None, extension=None):
begin_utc = datetime.datetime.utcnow()
self.logger.verbose("Launch command: [{0}]", cmd)
base_dir = self.get_base_dir()
with tempfile.TemporaryFile(dir=base_dir, mode="w+b") as stdout:
with tempfile.TemporaryFile(dir=base_dir, mode="w+b") as stderr:
if env is None:
env = {}
# Always add Extension Path and version to the current launch_command (Ask from publishers)
env.update({
ExtCommandEnvVariable.ExtensionPath: base_dir,
ExtCommandEnvVariable.ExtensionVersion: str(self.ext_handler.version),
ExtCommandEnvVariable.WireProtocolAddress: self.protocol.get_endpoint(),
# Setting sequence number to 0 incase no settings provided to keep in accordance with the empty
# 0.settings file that we create for such extensions.
ExtCommandEnvVariable.ExtensionSeqNumber: str(
extension.sequenceNumber) if extension is not None else _DEFAULT_SEQ_NO
})
if self.should_perform_multi_config_op(extension):
env[ExtCommandEnvVariable.ExtensionName] = extension.name
supported_features = []
for _, feature in get_agent_supported_features_list_for_extensions().items():
supported_features.append(
{
"Key": feature.name,
"Value": feature.version
}
)
if supported_features:
env[ExtCommandEnvVariable.ExtensionSupportedFeatures] = json.dumps(supported_features)
ext_name = self.get_extension_full_name(extension)
try:
# Some extensions erroneously begin cmd with a slash; don't interpret those
# as root-relative. (Issue #1170)
command_full_path = os.path.join(base_dir, cmd.lstrip(os.path.sep))
log_msg = "Executing command: {0} with environment variables: {1}".format(command_full_path,
json.dumps(env))
self.logger.info(log_msg)
self.report_event(name=ext_name, message=log_msg, log_event=False)
# Add the os environment variables before executing command
env.update(os.environ)
process_output = CGroupConfigurator.get_instance().start_extension_command(
extension_name=self.get_full_name(extension),
command=command_full_path,
cmd_name=cmd_name,
timeout=timeout,
shell=True,
cwd=base_dir,
env=env,
stdout=stdout,
stderr=stderr,
error_code=extension_error_code)
except OSError as e:
raise ExtensionError("Failed to launch '{0}': {1}".format(command_full_path, e.strerror),
code=extension_error_code)
duration = elapsed_milliseconds(begin_utc)
log_msg = "Command: {0}\n{1}".format(cmd, "\n".join(
[line for line in process_output.split('\n') if line != ""]))
self.logger.info(log_msg)
self.report_event(name=ext_name, message=log_msg, duration=duration, log_event=False)
return process_output
def load_manifest(self):
man_file = self.get_manifest_file()
try:
data = json.loads(fileutil.read_file(man_file))
except (IOError, OSError) as e:
raise ExtensionError('Failed to load manifest file ({0}): {1}'.format(man_file, e.strerror),
code=ExtensionErrorCodes.PluginHandlerManifestNotFound)
except ValueError:
raise ExtensionError('Malformed manifest file ({0}).'.format(man_file),
code=ExtensionErrorCodes.PluginHandlerManifestDeserializationError)
return HandlerManifest(data[0])
def update_settings_file(self, settings_file, settings):
settings_file = os.path.join(self.get_conf_dir(), settings_file)
try:
fileutil.write_file(settings_file, settings)
except IOError as e:
fileutil.clean_ioerror(e,
paths=[settings_file])
raise ExtensionError(u"Failed to update settings file", e)
def update_settings(self, extension):
if self.extensions is None or len(self.extensions) == 0 or extension is None:
# This is the behavior of waagent 2.0.x
# The new agent has to be consistent with the old one.
self.logger.info("Extension has no settings, write empty 0.settings")
self.update_settings_file("{0}.settings".format(_DEFAULT_SEQ_NO), "")
return
settings = {
'publicSettings': extension.publicSettings,
'protectedSettings': extension.protectedSettings,
'protectedSettingsCertThumbprint': extension.certificateThumbprint
}
ext_settings = {
"runtimeSettings": [{
"handlerSettings": settings
}]
}
# MultiConfig: change the name to ..settings for MC and .settings for SC
settings_file = "{0}.{1}.settings".format(extension.name, extension.sequenceNumber) if \
self.should_perform_multi_config_op(extension) else "{0}.settings".format(extension.sequenceNumber)
self.logger.info("Update settings file: {0}", settings_file)
self.update_settings_file(settings_file, json.dumps(ext_settings))
def create_handler_env(self):
handler_env = {
HandlerEnvironment.logFolder: self.get_log_dir(),
HandlerEnvironment.configFolder: self.get_conf_dir(),
HandlerEnvironment.statusFolder: self.get_status_dir(),
HandlerEnvironment.heartbeatFile: self.get_heartbeat_file()
}
if get_supported_feature_by_name(SupportedFeatureNames.ExtensionTelemetryPipeline).is_supported:
handler_env[HandlerEnvironment.eventsFolder] = self.get_extension_events_dir()
# For now, keep the preview key to not break extensions that were using the preview.
handler_env[HandlerEnvironment.eventsFolder_preview] = self.get_extension_events_dir()
env = [{
HandlerEnvironment.name: self.ext_handler.name,
HandlerEnvironment.version: HandlerEnvironment.schemaVersion,
HandlerEnvironment.handlerEnvironment: handler_env
}]
try:
fileutil.write_file(self.get_env_file(), json.dumps(env))
except IOError as e:
fileutil.clean_ioerror(e,
paths=[self.get_base_dir(), self.pkg_file])
raise ExtensionDownloadError(u"Failed to save handler environment", e)
def __get_handler_state_file_name(self, extension=None):
if self.should_perform_multi_config_op(extension):
return "{0}.HandlerState".format(extension.name)
return "HandlerState"
def set_handler_state(self, handler_state):
self.__set_state(name=self.__get_handler_state_file_name(), value=handler_state)
def get_handler_state(self):
return self.__get_state(name=self.__get_handler_state_file_name(), default=ExtHandlerState.NotInstalled)
def __set_extension_state(self, extension, extension_state):
self.__set_state(name=self.__get_handler_state_file_name(extension), value=extension_state)
def get_extension_state(self, extension=None):
return self.__get_state(name=self.__get_handler_state_file_name(extension), default=ExtensionState.Disabled)
def __set_state(self, name, value):
state_dir = self.get_conf_dir()
state_file = os.path.join(state_dir, name)
try:
if not os.path.exists(state_dir):
fileutil.mkdir(state_dir, mode=0o700)
fileutil.write_file(state_file, value)
except IOError as e:
fileutil.clean_ioerror(e, paths=[state_file])
self.logger.error("Failed to set state: {0}", e)
def __get_state(self, name, default=None):
state_dir = self.get_conf_dir()
state_file = os.path.join(state_dir, name)
if not os.path.isfile(state_file):
return default
try:
return fileutil.read_file(state_file)
except IOError as e:
self.logger.error("Failed to get state: {0}", e)
return default
def __remove_extension_state_files(self, extension):
self.logger.info("Removing states files for disabled extension: {0}".format(extension.name))
try:
# MultiConfig: Remove all config/.*.settings, status/.*.status and config/.HandlerState files
files_to_delete = [
os.path.join(self.get_conf_dir(), "{0}.*.settings".format(extension.name)),
os.path.join(self.get_status_dir(), "{0}.*.status".format(extension.name)),
os.path.join(self.get_conf_dir(), self.__get_handler_state_file_name(extension))
]
fileutil.rm_files(*files_to_delete)
except Exception as error:
extension_name = self.get_extension_full_name(extension)
message = "Failed to remove extension state files for {0}: {1}".format(extension_name, ustr(error))
self.report_event(name=extension_name, message=message, is_success=False, log_event=False)
self.logger.warn(message)
def set_handler_status(self, status=ExtHandlerStatusValue.not_ready, message="", code=0):
state_dir = self.get_conf_dir()
handler_status = ExtHandlerStatus()
handler_status.name = self.ext_handler.name
handler_status.version = str(self.ext_handler.version)
handler_status.message = message
handler_status.code = code
handler_status.status = status
handler_status.supports_multi_config = self.ext_handler.supports_multi_config
status_file = os.path.join(state_dir, "HandlerStatus")
try:
handler_status_json = json.dumps(get_properties(handler_status))
if handler_status_json is not None:
if not os.path.exists(state_dir):
fileutil.mkdir(state_dir, mode=0o700)
fileutil.write_file(status_file, handler_status_json)
else:
self.logger.error("Failed to create JSON document of handler status for {0} version {1}".format(
self.ext_handler.name, self.ext_handler.version))
except (IOError, ValueError, ProtocolError) as error:
fileutil.clean_ioerror(error, paths=[status_file])
self.logger.error("Failed to save handler status: {0}", textutil.format_exception(error))
def get_handler_status(self):
state_dir = self.get_conf_dir()
status_file = os.path.join(state_dir, "HandlerStatus")
if not os.path.isfile(status_file):
return None
handler_status_contents = ""
try:
handler_status_contents = fileutil.read_file(status_file)
data = json.loads(handler_status_contents)
handler_status = ExtHandlerStatus()
set_properties("ExtHandlerStatus", handler_status, data)
return handler_status
except (IOError, ValueError) as error:
self.logger.error("Failed to get handler status: {0}", error)
except Exception as error:
error_msg = "Failed to get handler status message: {0}.\n Contents of file: {1}".format(
ustr(error), handler_status_contents).replace('"', '\'')
add_periodic(
delta=logger.EVERY_HOUR,
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.ExtensionProcessing,
is_success=False,
message=error_msg)
raise
return None
def get_extension_package_zipfile_name(self):
return "{0}__{1}{2}".format(self.ext_handler.name,
self.ext_handler.version,
HANDLER_PKG_EXT)
def get_full_name(self, extension=None):
"""
:return: - if extension is None or Handler does not support Multi Config,
else then return - .-
"""
return "{0}-{1}".format(self.get_extension_full_name(extension), self.ext_handler.version)
def get_base_dir(self):
return os.path.join(conf.get_lib_dir(), self.get_full_name())
def get_status_dir(self):
return os.path.join(self.get_base_dir(), "status")
def get_conf_dir(self):
return os.path.join(self.get_base_dir(), 'config')
def get_extension_events_dir(self):
return os.path.join(self.get_log_dir(), EVENTS_DIRECTORY)
def get_heartbeat_file(self):
return os.path.join(self.get_base_dir(), 'heartbeat.log')
def get_manifest_file(self):
return os.path.join(self.get_base_dir(), 'HandlerManifest.json')
def get_env_file(self):
return os.path.join(self.get_base_dir(), HandlerEnvironment.fileName)
def get_log_dir(self):
return os.path.join(conf.get_ext_log_dir(), self.ext_handler.name)
@staticmethod
def is_azuremonitorlinuxagent(extension_name):
cgroup_monitor_extension_name = conf.get_cgroup_monitor_extension_name()
if re.match(r"\A" + cgroup_monitor_extension_name, extension_name) is not None\
and datetime.datetime.utcnow() < datetime.datetime.strptime(conf.get_cgroup_monitor_expiry_time(), "%Y-%m-%d"):
return True
return False
@staticmethod
def _read_status_file(ext_status_file):
err_count = 0
while True:
try:
return ExtHandlerInstance._read_and_parse_json_status_file(ext_status_file)
except Exception:
err_count += 1
if err_count >= _NUM_OF_STATUS_FILE_RETRIES:
raise
time.sleep(_STATUS_FILE_RETRY_DELAY)
@staticmethod
def _read_and_parse_json_status_file(ext_status_file):
if not os.path.exists(ext_status_file):
raise ExtensionStatusError(msg="Status file {0} does not exist".format(ext_status_file),
code=ExtensionStatusError.FileNotExists)
try:
data_str = fileutil.read_file(ext_status_file)
except IOError as e:
raise ExtensionStatusError(msg=ustr(e), inner=e,
code=ExtensionStatusError.CouldNotReadStatusFile)
try:
data = json.loads(data_str)
except (ValueError, TypeError) as e:
raise ExtensionStatusError(msg="{0} \n First 2000 Bytes of status file:\n {1}".format(ustr(e), ustr(data_str)[:2000]),
inner=e,
code=ExtensionStatusError.InvalidJsonFile)
return data_str, data
def _process_substatus_list(self, substatus_list, current_status_size=0):
processed_substatus = []
# Truncating the substatus to reduce the size, and preserve other fields of the text
for substatus in substatus_list:
substatus.name, field_size = self._truncate_message(substatus.name, _MAX_SUBSTATUS_FIELD_LENGTH)
current_status_size += field_size
substatus.message, field_size = self._truncate_message(substatus.message, _MAX_SUBSTATUS_FIELD_LENGTH)
current_status_size += field_size
if current_status_size <= _MAX_STATUS_FILE_SIZE_IN_BYTES:
processed_substatus.append(substatus)
else:
break
return processed_substatus
@staticmethod
def _truncate_message(field, truncate_size=_MAX_SUBSTATUS_FIELD_LENGTH): # pylint: disable=R1710
if field is None: # pylint: disable=R1705
return
else:
truncated_field = field if len(field) < truncate_size else field[:truncate_size] + _TRUNCATED_SUFFIX
return truncated_field, len(truncated_field)
class HandlerEnvironment(object):
# HandlerEnvironment.json schema version
schemaVersion = 1.0
fileName = "HandlerEnvironment.json"
handlerEnvironment = "handlerEnvironment"
logFolder = "logFolder"
configFolder = "configFolder"
statusFolder = "statusFolder"
heartbeatFile = "heartbeatFile"
eventsFolder_preview = "eventsFolder_preview"
eventsFolder = "eventsFolder"
name = "name"
version = "version"
class HandlerManifest(object):
def __init__(self, data):
if data is None or data['handlerManifest'] is None:
raise ExtensionError('Malformed manifest file.')
self.data = data
def get_name(self):
return self.data["name"]
def get_version(self):
return self.data["version"]
def get_install_command(self):
return self.data['handlerManifest']["installCommand"]
def get_uninstall_command(self):
return self.data['handlerManifest']["uninstallCommand"]
def get_update_command(self):
return self.data['handlerManifest']["updateCommand"]
def get_enable_command(self):
return self.data['handlerManifest']["enableCommand"]
def get_disable_command(self):
return self.data['handlerManifest']["disableCommand"]
def is_report_heartbeat(self):
return self.data['handlerManifest'].get('reportHeartbeat', False)
def is_update_with_install(self):
update_mode = self.data['handlerManifest'].get('updateMode')
if update_mode is None:
return True
return update_mode.lower() == "updatewithinstall"
def is_continue_on_update_failure(self):
return self.data['handlerManifest'].get('continueOnUpdateFailure', False)
def supports_multiple_extensions(self):
return self.data['handlerManifest'].get('supportsMultipleExtensions', False)
def get_resource_limits(self, extension_name, str_version):
"""
Placeholder values for testing and monitoring the monitor extension resource usage.
This is not effective after nov 30th.
"""
if ExtHandlerInstance.is_azuremonitorlinuxagent(extension_name):
if LooseVersion(str_version) < LooseVersion("1.12"):
test_man = {
"resourceLimits": {
"services": [
{
"name": "mdsd.service"
}
]
}
}
return ResourceLimits(test_man.get('resourceLimits', None))
else:
test_man = {
"resourceLimits": {
"services": [
{
"name": "azuremonitoragent.service"
}
]
}
}
return ResourceLimits(test_man.get('resourceLimits', None))
return ResourceLimits(self.data.get('resourceLimits', None))
class ResourceLimits(object):
def __init__(self, data):
self.data = data
def get_extension_slice_cpu_quota(self):
if self.data is not None:
return self.data.get('cpuQuotaPercentage', None)
return None
def get_extension_slice_memory_quota(self):
if self.data is not None:
return self.data.get('memoryQuotaInMB', None)
return None
def get_service_list(self):
if self.data is not None:
return self.data.get('services', None)
return None
class ExtensionStatusError(ExtensionError):
"""
When extension failed to provide a valid status file
"""
CouldNotReadStatusFile = 1
InvalidJsonFile = 2
StatusFileMalformed = 3
MaxSizeExceeded = 4
FileNotExists = 5
def __init__(self, msg=None, inner=None, code=-1): # pylint: disable=W0235
super(ExtensionStatusError, self).__init__(msg, inner, code)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/ga_version_updater.py 0000664 0000000 0000000 00000017531 14626177470 0025605 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import glob
import os
import shutil
from azurelinuxagent.common import conf, logger
from azurelinuxagent.common.exception import AgentUpdateError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_VERSION
from azurelinuxagent.ga.guestagent import GuestAgent, AGENT_MANIFEST_FILE
class GAVersionUpdater(object):
def __init__(self, gs_id):
self._gs_id = gs_id
self._version = FlexibleVersion("0.0.0.0") # Initialize to zero and retrieve from goal state later stage
self._agent_manifest = None # Initialize to None and fetch from goal state at different stage for different updater
def is_update_allowed_this_time(self, ext_gs_updated):
"""
This function checks if we allowed to update the agent.
@param ext_gs_updated: True if extension goal state updated else False
@return false when we don't allow updates.
"""
raise NotImplementedError
def is_rsm_update_enabled(self, agent_family, ext_gs_updated):
"""
return True if we need to switch to RSM-update from self-update and vice versa.
@param agent_family: agent family
@param ext_gs_updated: True if extension goal state updated else False
@return: False when agent need to stop rsm updates
True: when agent need to switch to rsm update
"""
raise NotImplementedError
def retrieve_agent_version(self, agent_family, goal_state):
"""
This function fetches the agent version from the goal state for the given family.
@param agent_family: agent family
@param goal_state: goal state
"""
raise NotImplementedError
def is_retrieved_version_allowed_to_update(self, agent_family):
"""
Checks all base condition if new version allow to update.
@param agent_family: agent family
@return: True if allowed to update else False
"""
raise NotImplementedError
def log_new_agent_update_message(self):
"""
This function logs the update message after we check agent allowed to update.
"""
raise NotImplementedError
def proceed_with_update(self):
"""
performs upgrade/downgrade
@return: AgentUpgradeExitException
"""
raise NotImplementedError
@property
def version(self):
"""
Return version
"""
return self._version
def sync_new_gs_id(self, gs_id):
"""
Update gs_id
@param gs_id: goal state id
"""
self._gs_id = gs_id
@staticmethod
def download_new_agent_pkg(package_to_download, protocol, is_fast_track_goal_state):
"""
Function downloads the new agent.
@param package_to_download: package to download
@param protocol: protocol object
@param is_fast_track_goal_state: True if goal state is fast track else False
"""
agent_name = "{0}-{1}".format(AGENT_NAME, package_to_download.version)
agent_dir = os.path.join(conf.get_lib_dir(), agent_name)
agent_pkg_path = ".".join((os.path.join(conf.get_lib_dir(), agent_name), "zip"))
agent_handler_manifest_file = os.path.join(agent_dir, AGENT_MANIFEST_FILE)
if not os.path.exists(agent_dir) or not os.path.isfile(agent_handler_manifest_file):
protocol.client.download_zip_package("agent package", package_to_download.uris, agent_pkg_path, agent_dir, use_verify_header=is_fast_track_goal_state)
else:
logger.info("Agent {0} was previously downloaded - skipping download", agent_name)
if not os.path.isfile(agent_handler_manifest_file):
try:
# Clean up the agent directory if the manifest file is missing
logger.info("Agent handler manifest file is missing, cleaning up the agent directory: {0}".format(agent_dir))
if os.path.isdir(agent_dir):
shutil.rmtree(agent_dir, ignore_errors=True)
except Exception as err:
logger.warn("Unable to delete Agent directory: {0}".format(err))
raise AgentUpdateError("Downloaded agent package: {0} is missing agent handler manifest file: {1}".format(agent_name, agent_handler_manifest_file))
def download_and_get_new_agent(self, protocol, agent_family, goal_state):
"""
Function downloads the new agent and returns the downloaded version.
@param protocol: protocol object
@param agent_family: agent family
@param goal_state: goal state
@return: GuestAgent: downloaded agent
"""
if self._agent_manifest is None: # Fetch agent manifest if it's not already done
self._agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris)
package_to_download = self._get_agent_package_to_download(self._agent_manifest, self._version)
is_fast_track_goal_state = goal_state.extensions_goal_state.source == GoalStateSource.FastTrack
self.download_new_agent_pkg(package_to_download, protocol, is_fast_track_goal_state)
agent = GuestAgent.from_agent_package(package_to_download)
return agent
def purge_extra_agents_from_disk(self):
"""
Remove the agents from disk except current version and new agent version
"""
known_agents = [CURRENT_VERSION, self._version]
self._purge_unknown_agents_from_disk(known_agents)
def _get_agent_package_to_download(self, agent_manifest, version):
"""
Returns the package of the given Version found in the manifest. If not found, returns exception
"""
for pkg in agent_manifest.pkg_list.versions:
if FlexibleVersion(pkg.version) == version:
# Found a matching package, only download that one
return pkg
raise AgentUpdateError("No matching package found in the agent manifest for version: {0} in goal state incarnation: {1}, "
"skipping agent update".format(str(version), self._gs_id))
@staticmethod
def _purge_unknown_agents_from_disk(known_agents):
"""
Remove from disk all directories and .zip files of unknown agents
"""
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
for agent_path in glob.iglob(path):
try:
name = fileutil.trim_ext(agent_path, "zip")
m = AGENT_DIR_PATTERN.match(name)
if m is not None and FlexibleVersion(m.group(1)) not in known_agents:
if os.path.isfile(agent_path):
logger.info(u"Purging outdated Agent file {0}", agent_path)
os.remove(agent_path)
else:
logger.info(u"Purging outdated Agent directory {0}", agent_path)
shutil.rmtree(agent_path)
except Exception as e:
logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e))
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/guestagent.py 0000664 0000000 0000000 00000027441 14626177470 0024074 0 ustar 00root root 0000000 0000000 import json
import os
import shutil
import time
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common import logger, conf
from azurelinuxagent.common.exception import UpdateError
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import AGENT_DIR_PATTERN, AGENT_NAME
from azurelinuxagent.ga.exthandlers import HandlerManifest
AGENT_ERROR_FILE = "error.json" # File name for agent error record
AGENT_MANIFEST_FILE = "HandlerManifest.json"
MAX_FAILURE = 3 # Max failure allowed for agent before declare bad agent
AGENT_UPDATE_COUNT_FILE = "update_attempt.json" # File for tracking agent update attempt count
class GuestAgent(object):
def __init__(self, path, pkg):
"""
If 'path' is given, the object is initialized to the version installed under that path.
If 'pkg' is given, the version specified in the package information is downloaded and the object is
initialized to that version.
NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly
"""
self.pkg = pkg
version = None
if path is not None:
m = AGENT_DIR_PATTERN.match(path)
if m is None:
raise UpdateError(u"Illegal agent directory: {0}".format(path))
version = m.group(1)
elif self.pkg is not None:
version = pkg.version
if version is None:
raise UpdateError(u"Illegal agent version: {0}".format(version))
self.version = FlexibleVersion(version)
location = u"disk" if path is not None else u"package"
logger.verbose(u"Loading Agent {0} from {1}", self.name, location)
self.error = GuestAgentError(self.get_agent_error_file())
self.error.load()
self.update_attempt_data = GuestAgentUpdateAttempt(self.get_agent_update_count_file())
self.update_attempt_data.load()
try:
self._ensure_loaded()
except Exception as e:
# If we're unable to unpack the agent, delete the Agent directory
try:
if os.path.isdir(self.get_agent_dir()):
shutil.rmtree(self.get_agent_dir(), ignore_errors=True)
except Exception as err:
logger.warn("Unable to delete Agent files: {0}".format(err))
msg = u"Agent {0} install failed with exception:".format(
self.name)
detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e))
add_event(
AGENT_NAME,
version=self.version,
op=WALAEventOperation.Install,
is_success=False,
message=detailed_msg)
@staticmethod
def from_installed_agent(path):
"""
Creates an instance of GuestAgent using the agent installed in the given 'path'.
"""
return GuestAgent(path, None)
@staticmethod
def from_agent_package(package):
"""
Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it.
"""
return GuestAgent(None, package)
@property
def name(self):
return "{0}-{1}".format(AGENT_NAME, self.version)
def get_agent_cmd(self):
return self.manifest.get_enable_command()
def get_agent_dir(self):
return os.path.join(conf.get_lib_dir(), self.name)
def get_agent_error_file(self):
return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE)
def get_agent_update_count_file(self):
return os.path.join(conf.get_lib_dir(), self.name, AGENT_UPDATE_COUNT_FILE)
def get_agent_manifest_path(self):
return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE)
def get_agent_pkg_path(self):
return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip"))
def clear_error(self):
self.error.clear()
self.error.save()
@property
def is_available(self):
return self.is_downloaded and not self.is_blacklisted
@property
def is_blacklisted(self):
return self.error is not None and self.error.is_blacklisted
@property
def is_downloaded(self):
return self.is_blacklisted or \
os.path.isfile(self.get_agent_manifest_path())
def mark_failure(self, is_fatal=False, reason=''):
try:
if not os.path.isdir(self.get_agent_dir()):
os.makedirs(self.get_agent_dir())
self.error.mark_failure(is_fatal=is_fatal, reason=reason)
self.error.save()
if self.error.is_blacklisted:
msg = u"Agent {0} is permanently blacklisted".format(self.name)
logger.warn(msg)
add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False,
version=self.version)
except Exception as e:
logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e))
def inc_update_attempt_count(self):
try:
self.update_attempt_data.inc_count()
self.update_attempt_data.save()
except Exception as e:
logger.warn(u"Agent {0} failed recording update attempt: {1}", self.name, ustr(e))
def get_update_attempt_count(self):
return self.update_attempt_data.count
def _ensure_loaded(self):
self._load_manifest()
self._load_error()
def _load_error(self):
try:
self.error = GuestAgentError(self.get_agent_error_file())
self.error.load()
logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error))
except Exception as e:
logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
def _load_manifest(self):
path = self.get_agent_manifest_path()
if not os.path.isfile(path):
msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE)
raise UpdateError(msg)
with open(path, "r") as manifest_file:
try:
manifests = json.load(manifest_file)
except Exception as e:
msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e))
raise UpdateError(msg)
if type(manifests) is list:
if len(manifests) <= 0:
msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE)
raise UpdateError(msg)
manifest = manifests[0]
else:
manifest = manifests
try:
self.manifest = HandlerManifest(manifest) # pylint: disable=W0201
if len(self.manifest.get_enable_command()) <= 0:
raise Exception(u"Manifest is missing the enable command")
except Exception as e:
msg = u"Agent {0} has an illegal {1}: {2}".format(
self.name,
AGENT_MANIFEST_FILE,
ustr(e))
raise UpdateError(msg)
logger.verbose(
u"Agent {0} loaded manifest from {1}",
self.name,
self.get_agent_manifest_path())
logger.verbose(u"Successfully loaded Agent {0} {1}: {2}",
self.name,
AGENT_MANIFEST_FILE,
ustr(self.manifest.data))
return
class GuestAgentError(object):
def __init__(self, path):
self.last_failure = 0.0
self.was_fatal = False
if path is None:
raise UpdateError(u"GuestAgentError requires a path")
self.path = path
self.failure_count = 0
self.reason = ''
self.clear()
return
def mark_failure(self, is_fatal=False, reason=''):
self.last_failure = time.time()
self.failure_count += 1
self.was_fatal = is_fatal
self.reason = reason
return
def clear(self):
self.last_failure = 0.0
self.failure_count = 0
self.was_fatal = False
self.reason = ''
return
@property
def is_blacklisted(self):
return self.was_fatal or self.failure_count >= MAX_FAILURE
def load(self):
if self.path is not None and os.path.isfile(self.path):
try:
with open(self.path, 'r') as f:
self.from_json(json.load(f))
except Exception as error:
# The error.json file is only supposed to be written only by the agent.
# If for whatever reason the file is malformed, just delete it to reset state of the errors.
logger.warn(
"Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format(
self.path, textutil.format_exception(error)))
try:
os.remove(self.path)
except Exception:
# We try best case efforts to delete the file, ignore error if we're unable to do so
pass
return
def save(self):
if os.path.isdir(os.path.dirname(self.path)):
with open(self.path, 'w') as f:
json.dump(self.to_json(), f)
return
def from_json(self, data):
self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0))
self.failure_count = max(self.failure_count, data.get(u"failure_count", 0))
self.was_fatal = self.was_fatal or data.get(u"was_fatal", False)
reason = data.get(u"reason", '')
self.reason = reason if reason != '' else self.reason
return
def to_json(self):
data = {
u"last_failure": self.last_failure,
u"failure_count": self.failure_count,
u"was_fatal": self.was_fatal,
u"reason": ustr(self.reason)
}
return data
def __str__(self):
return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format(
self.last_failure,
self.failure_count,
self.was_fatal,
self.reason)
class GuestAgentUpdateAttempt(object):
def __init__(self, path):
self.count = 0
if path is None:
raise UpdateError(u"GuestAgentUpdateAttempt requires a path")
self.path = path
self.clear()
def inc_count(self):
self.count += 1
def clear(self):
self.count = 0
def load(self):
if self.path is not None and os.path.isfile(self.path):
try:
with open(self.path, 'r') as f:
self.from_json(json.load(f))
except Exception as error:
# The update_attempt.json file is only supposed to be written only by the agent.
# If for whatever reason the file is malformed, just delete it to reset state of the errors.
logger.warn(
"Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format(
self.path, textutil.format_exception(error)))
try:
os.remove(self.path)
except Exception:
# We try best case efforts to delete the file, ignore error if we're unable to do so
pass
def save(self):
if os.path.isdir(os.path.dirname(self.path)):
with open(self.path, 'w') as f:
json.dump(self.to_json(), f)
def from_json(self, data):
self.count = data.get(u"count", 0)
def to_json(self):
data = {
u"count": self.count
}
return data
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/interfaces.py 0000664 0000000 0000000 00000002756 14626177470 0024053 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
class ThreadHandlerInterface(object):
"""
Interface for all thread handlers created and maintained by the GuestAgent.
"""
@staticmethod
def get_thread_name():
raise NotImplementedError("get_thread_name() not implemented")
def run(self):
raise NotImplementedError("run() not implemented")
def keep_alive(self):
"""
Returns true if the thread handler should be restarted when the thread dies
and false when it should remain dead.
Defaults to True and can be overridden by sub-classes.
"""
return True
def is_alive(self):
raise NotImplementedError("is_alive() not implemented")
def start(self):
raise NotImplementedError("start() not implemented")
def stop(self):
raise NotImplementedError("stop() not implemented") Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/logcollector.py 0000664 0000000 0000000 00000042674 14626177470 0024423 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import glob
import logging
import os
import subprocess
import time
import zipfile
from datetime import datetime
from heapq import heappush, heappop
from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file
from azurelinuxagent.common.event import initialize_event_logger_vminfo_common_parameters
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.logcollector_manifests import MANIFEST_NORMAL, MANIFEST_FULL
# Please note: be careful when adding agent dependencies in this module.
# This module uses its own logger and logs to its own file, not to the agent log.
from azurelinuxagent.common.protocol.goal_state import GoalStateProperties
from azurelinuxagent.common.protocol.util import get_protocol_util
_EXTENSION_LOG_DIR = get_ext_log_dir()
_AGENT_LIB_DIR = get_lib_dir()
_AGENT_LOG = get_agent_log_file()
_LOG_COLLECTOR_DIR = os.path.join(_AGENT_LIB_DIR, "logcollector")
_TRUNCATED_FILES_DIR = os.path.join(_LOG_COLLECTOR_DIR, "truncated")
OUTPUT_RESULTS_FILE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "results.txt")
COMPRESSED_ARCHIVE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "logs.zip")
CGROUPS_UNIT = "collect-logs.scope"
GRACEFUL_KILL_ERRCODE = 3
INVALID_CGROUPS_ERRCODE = 2
_MUST_COLLECT_FILES = [
_AGENT_LOG,
os.path.join(_AGENT_LIB_DIR, "waagent_status.json"),
os.path.join(_AGENT_LIB_DIR, "history", "*.zip"),
os.path.join(_EXTENSION_LOG_DIR, "*", "*"),
os.path.join(_EXTENSION_LOG_DIR, "*", "*", "*"),
"{0}.*".format(_AGENT_LOG) # any additional waagent.log files (e.g., waagent.log.1.gz)
]
_FILE_SIZE_LIMIT = 30 * 1024 * 1024 # 30 MB
_UNCOMPRESSED_ARCHIVE_SIZE_LIMIT = 150 * 1024 * 1024 # 150 MB
_LOGGER = logging.getLogger(__name__)
class LogCollector(object):
_TRUNCATED_FILE_PREFIX = "truncated_"
def __init__(self, is_full_mode=False):
self._is_full_mode = is_full_mode
self._manifest = MANIFEST_FULL if is_full_mode else MANIFEST_NORMAL
self._must_collect_files = self._expand_must_collect_files()
self._create_base_dirs()
self._set_logger()
self._initialize_telemetry()
@staticmethod
def _mkdir(dirname):
if not os.path.isdir(dirname):
os.makedirs(dirname)
@staticmethod
def _reset_file(filepath):
with open(filepath, "wb") as out_file:
out_file.write("".encode("utf-8"))
@staticmethod
def _create_base_dirs():
LogCollector._mkdir(_LOG_COLLECTOR_DIR)
LogCollector._mkdir(_TRUNCATED_FILES_DIR)
@staticmethod
def _set_logger():
_f_handler = logging.FileHandler(OUTPUT_RESULTS_FILE_PATH, encoding="utf-8")
_f_format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s',
datefmt=u'%Y-%m-%dT%H:%M:%SZ')
_f_format.converter = time.gmtime
_f_handler.setFormatter(_f_format)
_LOGGER.addHandler(_f_handler)
_LOGGER.setLevel(logging.INFO)
@staticmethod
def _initialize_telemetry():
protocol = get_protocol_util().get_protocol(init_goal_state=False)
protocol.client.reset_goal_state(goal_state_properties=GoalStateProperties.RoleConfig | GoalStateProperties.HostingEnv)
# Initialize the common parameters for telemetry events
initialize_event_logger_vminfo_common_parameters(protocol)
@staticmethod
def _run_shell_command(command, stdout=subprocess.PIPE, log_output=False):
"""
Runs a shell command in a subprocess, logs any errors to the log file, enables changing the stdout stream,
and logs the output of the command to the log file if indicated by the `log_output` parameter.
:param command: Shell command to run
:param stdout: Where to write the output of the command
:param log_output: If true, log the command output to the log file
"""
def format_command(cmd):
return " ".join(cmd) if isinstance(cmd, list) else command
def _encode_command_output(output):
return ustr(output, encoding="utf-8", errors="backslashreplace")
try:
process = subprocess.Popen(command, stdout=stdout, stderr=subprocess.PIPE, shell=False)
stdout, stderr = process.communicate()
return_code = process.returncode
except Exception as e:
error_msg = u"Command [{0}] raised unexpected exception: [{1}]".format(format_command(command), ustr(e))
_LOGGER.error(error_msg)
return
if return_code != 0:
encoded_stdout = _encode_command_output(stdout)
encoded_stderr = _encode_command_output(stderr)
error_msg = "Command: [{0}], return code: [{1}], stdout: [{2}] stderr: [{3}]".format(format_command(command),
return_code,
encoded_stdout,
encoded_stderr)
_LOGGER.error(error_msg)
return
if log_output:
msg = "Output of command [{0}]:\n{1}".format(format_command(command), _encode_command_output(stdout))
_LOGGER.info(msg)
@staticmethod
def _expand_must_collect_files():
# Match the regexes from the MUST_COLLECT_FILES list to existing file paths on disk.
manifest = []
for path in _MUST_COLLECT_FILES:
manifest.extend(sorted(glob.glob(path)))
return manifest
def _read_manifest(self):
return self._manifest.splitlines()
@staticmethod
def _process_ll_command(folder):
LogCollector._run_shell_command(["ls", "-alF", folder], log_output=True)
@staticmethod
def _process_echo_command(message):
_LOGGER.info(message)
@staticmethod
def _process_copy_command(path):
file_paths = glob.glob(path)
for file_path in file_paths:
_LOGGER.info(file_path)
return file_paths
@staticmethod
def _convert_file_name_to_archive_name(file_name):
# File name is the name of the file on disk, whereas archive name is the name of that same file in the archive.
# For non-truncated files: /var/log/waagent.log on disk becomes var/log/waagent.log in archive
# (leading separator is removed by the archive).
# For truncated files: /var/lib/waagent/logcollector/truncated/var/log/syslog.1 on disk becomes
# truncated_var_log_syslog.1 in the archive.
if file_name.startswith(_TRUNCATED_FILES_DIR):
original_file_path = file_name[len(_TRUNCATED_FILES_DIR):].lstrip(os.path.sep)
archive_file_name = LogCollector._TRUNCATED_FILE_PREFIX + original_file_path.replace(os.path.sep, "_")
return archive_file_name
else:
return file_name.lstrip(os.path.sep)
@staticmethod
def _remove_uncollected_truncated_files(files_to_collect):
# After log collection is completed, see if there are any old truncated files which were not collected
# and remove them since they probably won't be collected in the future. This is possible when the
# original file got deleted, so there is no need to keep its truncated version anymore.
truncated_files = os.listdir(_TRUNCATED_FILES_DIR)
for file_path in truncated_files:
full_path = os.path.join(_TRUNCATED_FILES_DIR, file_path)
if full_path not in files_to_collect:
if os.path.isfile(full_path):
os.remove(full_path)
@staticmethod
def _expand_parameters(manifest_data):
_LOGGER.info("Using %s as $LIB_DIR", _AGENT_LIB_DIR)
_LOGGER.info("Using %s as $LOG_DIR", _EXTENSION_LOG_DIR)
_LOGGER.info("Using %s as $AGENT_LOG", _AGENT_LOG)
new_manifest = []
for line in manifest_data:
new_line = line.replace("$LIB_DIR", _AGENT_LIB_DIR)
new_line = new_line.replace("$LOG_DIR", _EXTENSION_LOG_DIR)
new_line = new_line.replace("$AGENT_LOG", _AGENT_LOG)
new_manifest.append(new_line)
return new_manifest
def _process_manifest_file(self):
files_to_collect = set()
data = self._read_manifest()
manifest_entries = LogCollector._expand_parameters(data)
for entry in manifest_entries:
# The entry can be one of the four flavours:
# 1) ll,/etc/udev/rules.d -- list out contents of the folder and store to results file
# 2) echo,### Gathering Configuration Files ### -- print message to results file
# 3) copy,/var/lib/waagent/provisioned -- add file to list of files to be collected
# 4) diskinfo, -- ignore commands from manifest other than ll, echo, and copy for now
contents = entry.split(",")
if len(contents) != 2:
# If it's not a comment or an empty line, it's a malformed entry
if not entry.startswith("#") and len(entry.strip()) > 0:
_LOGGER.error("Couldn't parse \"%s\"", entry)
continue
command, value = contents
if command == "ll":
self._process_ll_command(value)
elif command == "echo":
self._process_echo_command(value)
elif command == "copy":
files_to_collect.update(self._process_copy_command(value))
return files_to_collect
@staticmethod
def _truncate_large_file(file_path):
# Truncate large file to size limit (keep freshest entries of the file), copy file to a temporary location
# and update file path in list of files to collect
try:
# Binary files cannot be truncated, don't include large binary files
ext = os.path.splitext(file_path)[1]
if ext in [".gz", ".zip", ".xz"]:
_LOGGER.warning("Discarding large binary file %s", file_path)
return None
truncated_file_path = os.path.join(_TRUNCATED_FILES_DIR, file_path.replace(os.path.sep, "_"))
if os.path.exists(truncated_file_path):
original_file_mtime = os.path.getmtime(file_path)
truncated_file_mtime = os.path.getmtime(truncated_file_path)
# If the original file hasn't been updated since the truncated file, it means there were no changes
# and we don't need to truncate it again.
if original_file_mtime < truncated_file_mtime:
return truncated_file_path
# Get the last N bytes of the file
with open(truncated_file_path, "w+") as fh:
LogCollector._run_shell_command(["tail", "-c", str(_FILE_SIZE_LIMIT), file_path], stdout=fh)
return truncated_file_path
except OSError as e:
_LOGGER.error("Failed to truncate large file: %s", ustr(e))
return None
def _get_file_priority(self, file_entry):
# The sooner the file appears in the must collect list, the bigger its priority.
# Priority is higher the lower the number (0 is highest priority).
try:
return self._must_collect_files.index(file_entry)
except ValueError:
# Doesn't matter, file is not in the must collect list, assign a low priority
return 999999999
def _get_priority_files_list(self, file_list):
# Given a list of files to collect, determine if they show up in the must collect list and build a priority
# queue. The queue will determine the order in which the files are collected, highest priority files first.
priority_file_queue = []
for file_entry in file_list:
priority = self._get_file_priority(file_entry)
heappush(priority_file_queue, (priority, file_entry))
return priority_file_queue
def _get_final_list_for_archive(self, priority_file_queue):
# Given a priority queue of files to collect, add one by one while the archive size is under the size limit.
# If a single file is over the file size limit, truncate it before adding it to the archive.
_LOGGER.info("### Preparing list of files to add to archive ###")
total_uncompressed_size = 0
final_files_to_collect = []
while priority_file_queue:
file_path = heappop(priority_file_queue)[1] # (priority, file_path)
file_size = min(os.path.getsize(file_path), _FILE_SIZE_LIMIT)
if total_uncompressed_size + file_size > _UNCOMPRESSED_ARCHIVE_SIZE_LIMIT:
_LOGGER.warning("Archive too big, done with adding files.")
break
if os.path.getsize(file_path) <= _FILE_SIZE_LIMIT:
final_files_to_collect.append(file_path)
_LOGGER.info("Adding file %s, size %s b", file_path, file_size)
else:
truncated_file_path = self._truncate_large_file(file_path)
if truncated_file_path:
_LOGGER.info("Adding truncated file %s, size %s b", truncated_file_path, file_size)
final_files_to_collect.append(truncated_file_path)
total_uncompressed_size += file_size
_LOGGER.info("Uncompressed archive size is %s b", total_uncompressed_size)
return final_files_to_collect
def _create_list_of_files_to_collect(self):
# The final list of files to be collected by zip is created in three steps:
# 1) Parse given manifest file, expanding wildcards and keeping a list of files that exist on disk
# 2) Assign those files a priority depending on whether they are in the must collect file list.
# 3) In priority order, add files to the final list to be collected, until the size of the archive is under
# the size limit.
parsed_file_paths = self._process_manifest_file()
prioritized_file_paths = self._get_priority_files_list(parsed_file_paths)
files_to_collect = self._get_final_list_for_archive(prioritized_file_paths)
return files_to_collect
def collect_logs_and_get_archive(self):
"""
Public method that collects necessary log files in a compressed zip archive.
:return: Returns the path of the collected compressed archive
"""
files_to_collect = []
try:
# Clear previous run's output and create base directories if they don't exist already.
self._create_base_dirs()
LogCollector._reset_file(OUTPUT_RESULTS_FILE_PATH)
start_time = datetime.utcnow()
_LOGGER.info("Starting log collection at %s", start_time.strftime("%Y-%m-%dT%H:%M:%SZ"))
_LOGGER.info("Using log collection mode %s", "full" if self._is_full_mode else "normal")
files_to_collect = self._create_list_of_files_to_collect()
_LOGGER.info("### Creating compressed archive ###")
compressed_archive = None
try:
compressed_archive = zipfile.ZipFile(COMPRESSED_ARCHIVE_PATH, "w", compression=zipfile.ZIP_DEFLATED)
max_errors = 8
error_count = 0
for file_to_collect in files_to_collect:
try:
archive_file_name = LogCollector._convert_file_name_to_archive_name(file_to_collect)
compressed_archive.write(file_to_collect.encode("utf-8"), arcname=archive_file_name)
except Exception as e:
error_count += 1
if error_count >= max_errors:
raise Exception("Too many errors, giving up. Last error: {0}".format(ustr(e)))
else:
_LOGGER.warning("Failed to add file %s to the archive: %s", file_to_collect, ustr(e))
compressed_archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH)
_LOGGER.info("Successfully compressed files. Compressed archive size is %s b", compressed_archive_size)
end_time = datetime.utcnow()
duration = end_time - start_time
elapsed_ms = int(((duration.days * 24 * 60 * 60 + duration.seconds) * 1000) + (duration.microseconds / 1000.0))
_LOGGER.info("Finishing log collection at %s", end_time.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
_LOGGER.info("Elapsed time: %s ms", elapsed_ms)
compressed_archive.write(OUTPUT_RESULTS_FILE_PATH.encode("utf-8"), arcname="results.txt")
finally:
if compressed_archive is not None:
compressed_archive.close()
return COMPRESSED_ARCHIVE_PATH
except Exception as e:
msg = "Failed to collect logs: {0}".format(ustr(e))
_LOGGER.error(msg)
raise
finally:
self._remove_uncollected_truncated_files(files_to_collect)
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/logcollector_manifests.py 0000664 0000000 0000000 00000006352 14626177470 0026465 0 ustar 00root root 0000000 0000000 # Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
MANIFEST_NORMAL = """echo,### Probing Directories ###
ll,/var/log
ll,$LIB_DIR
echo,### Gathering Configuration Files ###
copy,/etc/*-release
copy,/etc/HOSTNAME
copy,/etc/hostname
copy,/etc/waagent.conf
echo,
echo,### Gathering Log Files ###
copy,$AGENT_LOG*
copy,/var/log/dmesg*
copy,/var/log/syslog*
copy,/var/log/auth*
copy,$LOG_DIR/*/*
copy,$LOG_DIR/*/*/*
copy,$LOG_DIR/custom-script/handler.log
echo,
echo,### Gathering Extension Files ###
copy,$LIB_DIR/ovf-env.xml
copy,$LIB_DIR/waagent_status.json
copy,$LIB_DIR/*/status/*.status
copy,$LIB_DIR/*/config/*.settings
copy,$LIB_DIR/*/config/HandlerState
copy,$LIB_DIR/*/config/HandlerStatus
copy,$LIB_DIR/error.json
copy,$LIB_DIR/history/*.zip
echo,
"""
MANIFEST_FULL = """echo,### Probing Directories ###
ll,/var/log
ll,$LIB_DIR
ll,/etc/udev/rules.d
echo,### Gathering Configuration Files ###
copy,$LIB_DIR/provisioned
copy,/etc/fstab
copy,/etc/ssh/sshd_config
copy,/boot/grub*/grub.c*
copy,/boot/grub*/menu.lst
copy,/etc/*-release
copy,/etc/HOSTNAME
copy,/etc/hostname
copy,/etc/network/interfaces
copy,/etc/network/interfaces.d/*.cfg
copy,/etc/netplan/50-cloud-init.yaml
copy,/etc/nsswitch.conf
copy,/etc/resolv.conf
copy,/run/systemd/resolve/stub-resolv.conf
copy,/run/resolvconf/resolv.conf
copy,/etc/sysconfig/iptables
copy,/etc/sysconfig/network
copy,/etc/sysconfig/network/ifcfg-eth*
copy,/etc/sysconfig/network/routes
copy,/etc/sysconfig/network-scripts/ifcfg-eth*
copy,/etc/sysconfig/network-scripts/route-eth*
copy,/etc/sysconfig/SuSEfirewall2
copy,/etc/ufw/ufw.conf
copy,/etc/waagent.conf
copy,/var/lib/dhcp/dhclient.eth0.leases
copy,/var/lib/dhclient/dhclient-eth0.leases
copy,/var/lib/wicked/lease-eth0-dhcp-ipv4.xml
copy,/run/systemd/netif/leases/2
echo,
echo,### Gathering Log Files ###
copy,$AGENT_LOG*
copy,/var/log/syslog*
copy,/var/log/rsyslog*
copy,/var/log/messages*
copy,/var/log/kern*
copy,/var/log/dmesg*
copy,/var/log/dpkg*
copy,/var/log/yum*
copy,/var/log/cloud-init*
copy,/var/log/boot*
copy,/var/log/auth*
copy,/var/log/secure*
copy,$LOG_DIR/*/*
copy,$LOG_DIR/*/*/*
copy,$LOG_DIR/custom-script/handler.log
copy,$LOG_DIR/run-command/handler.log
echo,
echo,### Gathering Extension Files ###
copy,$LIB_DIR/ovf-env.xml
copy,$LIB_DIR/*/status/*.status
copy,$LIB_DIR/*/config/*.settings
copy,$LIB_DIR/*/config/HandlerState
copy,$LIB_DIR/*/config/HandlerStatus
copy,$LIB_DIR/SharedConfig.xml
copy,$LIB_DIR/ManagedIdentity-*.json
copy,$LIB_DIR/*/error.json
copy,$LIB_DIR/waagent_status.json
copy,$LIB_DIR/history/*.zip
echo,
echo,### Gathering Disk Info ###
diskinfo,
echo,### Gathering Guest ProxyAgent Log Files ###
copy,/var/log/azure-proxy-agent/*
echo,
"""
Azure-WALinuxAgent-2b21de5/azurelinuxagent/ga/monitor.py 0000664 0000000 0000000 00000031451 14626177470 0023411 0 ustar 00root root 0000000 0000000 # Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#
import datetime
import os
import threading
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.networkutil as networkutil
from azurelinuxagent.ga.cgroup import MetricValue, MetricsCategory, MetricsCounter
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.errorstate import ErrorState
from azurelinuxagent.common.event import add_event, WALAEventOperation, report_metric
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.protocol.healthservice import HealthService
from azurelinuxagent.common.protocol.imds import get_imds_client
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.utils.restutil import IOErrorCounter
from azurelinuxagent.common.utils.textutil import hash_strings
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
from azurelinuxagent.ga.periodic_operation import PeriodicOperation
def get_monitor_handler():
return MonitorHandler()
class PollResourceUsage(PeriodicOperation):
"""
Periodic operation to poll the tracked cgroups for resource usage data.
It also checks whether there are processes in the agent's cgroup that should not be there.
"""
def __init__(self):
super(PollResourceUsage, self).__init__(conf.get_cgroup_check_period())
self.__log_metrics = conf.get_cgroup_log_metrics()
self.__periodic_metrics = {}
def _operation(self):
tracked_metrics = CGroupsTelemetry.poll_all_tracked()
for metric in tracked_metrics:
key = metric.category + metric.counter + metric.instance
if key not in self.__periodic_metrics or (self.__periodic_metrics[key] + metric.report_period) <= datetime.datetime.now():
report_metric(metric.category, metric.counter, metric.instance, metric.value, log_event=self.__log_metrics)
self.__periodic_metrics[key] = datetime.datetime.now()
CGroupConfigurator.get_instance().check_cgroups(tracked_metrics)
class PollSystemWideResourceUsage(PeriodicOperation):
def __init__(self):
super(PollSystemWideResourceUsage, self).__init__(datetime.timedelta(hours=1))
self.__log_metrics = conf.get_cgroup_log_metrics()
self.osutil = get_osutil()
def poll_system_memory_metrics(self):
used_mem, available_mem = self.osutil.get_used_and_available_system_memory()
return [
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.USED_MEM, "",
used_mem),
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.AVAILABLE_MEM, "",
available_mem)
]
def _operation(self):
metrics = self.poll_system_memory_metrics()
for metric in metrics:
report_metric(metric.category, metric.counter, metric.instance, metric.value, log_event=self.__log_metrics)
class ResetPeriodicLogMessages(PeriodicOperation):
"""
Periodic operation to clean up the hash-tables maintained by the loggers. For reference, please check
azurelinuxagent.common.logger.Logger and azurelinuxagent.common.event.EventLogger classes
"""
def __init__(self):
super(ResetPeriodicLogMessages, self).__init__(datetime.timedelta(hours=12))
def _operation(self):
logger.reset_periodic()
class ReportNetworkErrors(PeriodicOperation):
def __init__(self):
super(ReportNetworkErrors, self).__init__(datetime.timedelta(minutes=30))
def _operation(self):
io_errors = IOErrorCounter.get_and_reset()
hostplugin_errors = io_errors.get("hostplugin")
protocol_errors = io_errors.get("protocol")
other_errors = io_errors.get("other")
if hostplugin_errors > 0 or protocol_errors > 0 or other_errors > 0:
msg = "hostplugin:{0};protocol:{1};other:{2}".format(hostplugin_errors, protocol_errors, other_errors)
add_event(op=WALAEventOperation.HttpErrors, message=msg)
class ReportNetworkConfigurationChanges(PeriodicOperation):
"""
Periodic operation to check and log changes in network configuration.
"""
def __init__(self):
super(ReportNetworkConfigurationChanges, self).__init__(datetime.timedelta(minutes=1))
self.osutil = get_osutil()
self.last_route_table_hash = b''
self.last_nic_state = {}
def log_network_configuration(self):
try:
route_file = '/proc/net/route'
if os.path.exists(route_file):
lines = []
with open(route_file) as file_object:
for line in file_object:
lines.append(line)
if len(lines) >= 100:
lines.append("