| // Copyright 2008 Google Inc. All Rights Reserved. |
| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // error_diag.h: Ambiguous error diagnosis class |
| |
| #ifndef STRESSAPPTEST_ERROR_DIAG_H_ |
| #define STRESSAPPTEST_ERROR_DIAG_H_ |
| |
| #include <pthread.h> |
| #include <list> |
| #include <map> |
| #include <set> |
| #include <string> |
| |
| // This file must work with autoconf on its public version, |
| // so these includes are correct. |
| #include "sattypes.h" |
| #include "os.h" |
| |
| class ErrorInstance; |
| |
| // This describes the components of the system. |
| class DeviceTree { |
| public: |
| explicit DeviceTree(string name); |
| ~DeviceTree(); |
| |
| // Atomically find arbitrary device in subtree. |
| DeviceTree *FindInSubTree(string name); |
| // Find or add named device. |
| DeviceTree *FindOrAddDevice(string name); |
| // Atomically add sub device. |
| void InsertSubDevice(string name); |
| // Returns parent device. |
| DeviceTree *GetParent() { return parent_; } |
| // Pretty prints device tree. |
| void PrettyPrint(string spacer = " "); |
| // Atomically add error instance to device. |
| void AddErrorInstance(ErrorInstance *error_instance); |
| // Returns true of device is known to be bad. |
| bool KnownBad(); |
| // Returns number of direct sub devices. |
| int NumDirectSubDevices() { return subdevices_.size(); } |
| |
| private: |
| // Unlocked version of FindInSubTree. |
| DeviceTree *UnlockedFindInSubTree(string name); |
| |
| std::map<string, DeviceTree*> subdevices_; // Map of sub-devices. |
| std::list<ErrorInstance*> errors_; // Log of errors. |
| DeviceTree *parent_; // Pointer to parent device. |
| string name_; // Device name. |
| pthread_mutex_t device_tree_mutex_; // Mutex protecting device tree. |
| }; |
| |
| |
| // enum type for collected errors. |
| enum SATErrorType { |
| SAT_ERROR_NONE = 0, |
| SAT_ERROR_ECC, |
| SAT_ERROR_MISCOMPARE, |
| SAT_ERROR_SECTOR_TAG, |
| }; |
| |
| // enum type for error severity. |
| enum SATErrorSeverity { |
| SAT_ERROR_CORRECTABLE = 0, |
| SAT_ERROR_FATAL, |
| }; |
| |
| // This describes an error and it's likely causes. |
| class ErrorInstance { |
| public: |
| ErrorInstance(): type_(SAT_ERROR_NONE), severity_(SAT_ERROR_CORRECTABLE) {} |
| |
| SATErrorType type_; // Type of error: ECC, miscompare, sector. |
| SATErrorSeverity severity_; // Correctable, or fatal. |
| std::set<DeviceTree*> causes_; // Devices that can cause this type of error. |
| }; |
| |
| // This describes ECC errors. |
| class ECCErrorInstance: public ErrorInstance { |
| public: |
| ECCErrorInstance() { type_ = SAT_ERROR_ECC; } |
| |
| uint64 addr_; // Address where error occured. |
| }; |
| |
| // This describes miscompare errors. |
| class MiscompareErrorInstance: public ErrorInstance { |
| public: |
| MiscompareErrorInstance() { type_ = SAT_ERROR_MISCOMPARE; } |
| |
| uint64 addr_; // Address where miscompare occured. |
| }; |
| |
| // This describes HDD miscompare errors. |
| class HDDMiscompareErrorInstance: public MiscompareErrorInstance { |
| public: |
| uint64 addr2_; // addr_ and addr2_ are src and dst memory addr. |
| int offset_; // offset. |
| int block_; // error block. |
| }; |
| |
| // This describes HDD miscompare errors. |
| class HDDSectorTagErrorInstance: public ErrorInstance { |
| public: |
| HDDSectorTagErrorInstance() { type_ = SAT_ERROR_SECTOR_TAG; } |
| |
| uint64 addr_; |
| uint64 addr2_; // addr_ and addr2_ are src and dst memory addr. |
| int sector_; // error sector. |
| int block_; // error block. |
| }; |
| |
| // Generic error storage and sorting class. |
| class ErrorDiag { |
| public: |
| ErrorDiag(); |
| virtual ~ErrorDiag(); |
| |
| // Add info about a CECC. |
| virtual int AddCeccError(string dimm_string); |
| |
| // Add info about a UECC. |
| virtual int AddUeccError(string dimm_string); |
| |
| // Add info about a miscompare. |
| virtual int AddMiscompareError(string dimm_string, uint64 addr, int count); |
| |
| // Add info about a miscompare from a drive. |
| virtual int AddHDDMiscompareError(string devicename, int block, int offset, |
| void *src_addr, void *dst_addr); |
| |
| // Add info about a sector tag miscompare from a drive. |
| virtual int AddHDDSectorTagError(string devicename, int block, int offset, |
| int sector, void *src_addr, void *dst_addr); |
| |
| // Set platform specific handle and initialize device tree. |
| bool set_os(OsLayer *os); |
| |
| protected: |
| // Create and initialize system device tree. |
| virtual bool InitializeDeviceTree(); |
| |
| // Utility Function to translate a virtual address to DIMM number. |
| string AddressToDimmString(OsLayer *os, void *addr, int offset); |
| |
| DeviceTree *system_tree_root_; // System device tree. |
| OsLayer *os_; // Platform handle. |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(ErrorDiag); |
| }; |
| |
| #endif // STRESSAPPTEST_ERROR_DIAG_H_ |