360 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			360 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| /*
 | |
|  * Copyright 2007 ZXing authors
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  *      http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| namespace Zxing\Qrcode\Decoder;
 | |
| 
 | |
| use Zxing\DecodeHintType;
 | |
| use Zxing\FormatException;
 | |
| use Zxing\Common\BitSource;
 | |
| use Zxing\Common\CharacterSetECI;
 | |
| use Zxing\Common\DecoderResult;
 | |
| use Zxing\Common\StringUtils;
 | |
| 
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
 | |
|  * in one QR Code. This class decodes the bits back into text.</p>
 | |
|  *
 | |
|  * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
 | |
|  *
 | |
|  * @author Sean Owen
 | |
|  */
 | |
| final class DecodedBitStreamParser {
 | |
| 
 | |
|     /**
 | |
|      * See ISO 18004:2006, 6.4.4 Table 5
 | |
|      */
 | |
|     private static  $ALPHANUMERIC_CHARS = array(
 | |
|         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
 | |
|         'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
 | |
|         'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 | |
|         ' ', '$', '%', '*', '+', '-', '.', '/', ':'
 | |
|     );
 | |
|     private static  $GB2312_SUBSET = 1;
 | |
| 
 | |
| 
 | |
|     private function DecodedBitStreamParser() {
 | |
| 
 | |
| 
 | |
|     }
 | |
| 
 | |
|     static function decode($bytes,
 | |
|                            $version,
 | |
|                            $ecLevel,
 | |
|                            $hints)  {
 | |
|         $bits = new BitSource($bytes);
 | |
|         $result = '';//new StringBuilder(50);
 | |
|         $byteSegments = array();
 | |
|         $symbolSequence = -1;
 | |
|         $parityData = -1;
 | |
| 
 | |
|         try {
 | |
|             $currentCharacterSetECI = null;
 | |
|             $fc1InEffect = false;
 | |
|             $mode='';
 | |
|             do {
 | |
|                 // While still another segment to read...
 | |
|                 if ($bits->available() < 4) {
 | |
|                     // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
 | |
|                     $mode = Mode::$TERMINATOR;
 | |
|                 } else {
 | |
|                     $mode = Mode::forBits($bits->readBits(4)); // mode is encoded by 4 bits
 | |
|                 }
 | |
|                 if ($mode != Mode::$TERMINATOR) {
 | |
|                     if ($mode == Mode::$FNC1_FIRST_POSITION || $mode == Mode::$FNC1_SECOND_POSITION) {
 | |
|                         // We do little with FNC1 except alter the parsed result a bit according to the spec
 | |
|                         $fc1InEffect = true;
 | |
|                     } else if ($mode == Mode::$STRUCTURED_APPEND) {
 | |
|                         if ($bits->available() < 16) {
 | |
|                             throw FormatException::getFormatInstance();
 | |
|                         }
 | |
|                         // sequence number and parity is added later to the result metadata
 | |
|                         // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
 | |
|                         $symbolSequence = $bits->readBits(8);
 | |
|                         $parityData = $bits->readBits(8);
 | |
|                     } else if ($mode == Mode::$ECI) {
 | |
|                         // Count doesn't apply to ECI
 | |
|                         $value = self::parseECIValue($bits);
 | |
|                         $currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue($value);
 | |
|                         if ($currentCharacterSetECI == null) {
 | |
|                             throw FormatException::getFormatInstance();
 | |
|                         }
 | |
|                     } else {
 | |
|                         // First handle Hanzi mode which does not start with character count
 | |
|                         if ($mode == Mode::$HANZI) {
 | |
|                             //chinese mode contains a sub set indicator right after mode indicator
 | |
|                             $subset = $bits->readBits(4);
 | |
|                             $countHanzi = $bits->readBits($mode->getCharacterCountBits($version));
 | |
|                             if ($subset == self::$GB2312_SUBSET) {
 | |
|                                 self::decodeHanziSegment($bits, $result, $countHanzi);
 | |
|                             }
 | |
|                         } else {
 | |
|                             // "Normal" QR code modes:
 | |
|                             // How many characters will follow, encoded in this mode?
 | |
|                             $count = $bits->readBits($mode->getCharacterCountBits($version));
 | |
|                             if ($mode == Mode::$NUMERIC) {
 | |
|                                 self::decodeNumericSegment($bits, $result, $count);
 | |
|                             } else if ($mode == Mode::$ALPHANUMERIC) {
 | |
|                                 self::decodeAlphanumericSegment($bits, $result, $count, $fc1InEffect);
 | |
|                             } else if ($mode == Mode::$BYTE) {
 | |
|                                 self::decodeByteSegment($bits, $result, $count, $currentCharacterSetECI, $byteSegments, $hints);
 | |
|                             } else if ($mode == Mode::$KANJI) {
 | |
|                                 self::decodeKanjiSegment($bits, $result, $count);
 | |
|                             } else {
 | |
|                                 throw FormatException::getFormatInstance();
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             } while ($mode != Mode::$TERMINATOR);
 | |
|         } catch (IllegalArgumentException $iae) {
 | |
|             // from readBits() calls
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
| 
 | |
|         return new DecoderResult($bytes,
 | |
|             $result,
 | |
|             empty($byteSegments) ? null : $byteSegments,
 | |
|             $ecLevel == null ? null : 'L',//ErrorCorrectionLevel::toString($ecLevel),
 | |
|             $symbolSequence,
 | |
|             $parityData);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * See specification GBT 18284-2000
 | |
|      */
 | |
|     private static function decodeHanziSegment($bits,
 | |
|                                                &$result,
 | |
|                                                $count)  {
 | |
|         // Don't crash trying to read more bits than we have available.
 | |
|         if ($count * 13 > $bits->available()) {
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
| 
 | |
|         // Each character will require 2 bytes. Read the characters as 2-byte pairs
 | |
|         // and decode as GB2312 afterwards
 | |
|         $buffer = fill_array(0,2 * $count,0);
 | |
|         $offset = 0;
 | |
|         while ($count > 0) {
 | |
|             // Each 13 bits encodes a 2-byte character
 | |
|             $twoBytes = $bits->readBits(13);
 | |
|             $assembledTwoBytes = (($twoBytes / 0x060) << 8) | ($twoBytes % 0x060);
 | |
|             if ($assembledTwoBytes < 0x003BF) {
 | |
|                 // In the 0xA1A1 to 0xAAFE range
 | |
|                 $assembledTwoBytes += 0x0A1A1;
 | |
|             } else {
 | |
|                 // In the 0xB0A1 to 0xFAFE range
 | |
|                 $assembledTwoBytes += 0x0A6A1;
 | |
|             }
 | |
|             $buffer[$offset] =  (($assembledTwoBytes >> 8) & 0xFF);//(byte)
 | |
|             $buffer[$offset + 1] =  ($assembledTwoBytes & 0xFF);//(byte)
 | |
|             $offset += 2;
 | |
|             $count--;
 | |
|         }
 | |
| 
 | |
|         try {
 | |
|             $result .=  iconv('GB2312', 'UTF-8', implode($buffer));
 | |
|         } catch (UnsupportedEncodingException $ignored) {
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private static function decodeKanjiSegment($bits,
 | |
|                                                &$result,
 | |
|                                                $count)  {
 | |
|         // Don't crash trying to read more bits than we have available.
 | |
|         if ($count * 13 > $bits->available()) {
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
| 
 | |
|         // Each character will require 2 bytes. Read the characters as 2-byte pairs
 | |
|         // and decode as Shift_JIS afterwards
 | |
|         $buffer = array(0,2 * $count,0);
 | |
|         $offset = 0;
 | |
|         while ($count > 0) {
 | |
|             // Each 13 bits encodes a 2-byte character
 | |
|             $twoBytes = $bits->readBits(13);
 | |
|             $assembledTwoBytes = (($twoBytes / 0x0C0) << 8) | ($twoBytes % 0x0C0);
 | |
|             if ($assembledTwoBytes < 0x01F00) {
 | |
|                 // In the 0x8140 to 0x9FFC range
 | |
|                 $assembledTwoBytes += 0x08140;
 | |
|             } else {
 | |
|                 // In the 0xE040 to 0xEBBF range
 | |
|                 $assembledTwoBytes += 0x0C140;
 | |
|             }
 | |
|             $buffer[$offset] =  ($assembledTwoBytes >> 8);//(byte)
 | |
|             $buffer[$offset + 1] = $assembledTwoBytes; //(byte)
 | |
|             $offset += 2;
 | |
|             $count--;
 | |
|         }
 | |
|         // Shift_JIS may not be supported in some environments:
 | |
|         try {
 | |
|             $result .= iconv('shift-jis','utf-8',implode($buffer));
 | |
| 
 | |
| 
 | |
|         } catch (UnsupportedEncodingException $ignored) {
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private static function decodeByteSegment($bits,
 | |
|                                               &$result,
 | |
|                                               $count,
 | |
|                                               $currentCharacterSetECI,
 | |
|                                               &$byteSegments,
 | |
|                                               $hints)  {
 | |
|         // Don't crash trying to read more bits than we have available.
 | |
|         if (8 * $count > $bits->available()) {
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
| 
 | |
|         $readBytes = fill_array(0,$count,0);
 | |
|         for ($i = 0; $i < $count; $i++) {
 | |
|             $readBytes[$i] = $bits->readBits(8);//(byte)
 | |
|         }
 | |
|         $text = implode(array_map('chr',$readBytes));
 | |
|         $encoding = '';
 | |
|         if ($currentCharacterSetECI == null) {
 | |
|             // The spec isn't clear on this mode; see
 | |
|             // section 6.4.5: t does not say which encoding to assuming
 | |
|             // upon decoding. I have seen ISO-8859-1 used as well as
 | |
|             // Shift_JIS -- without anything like an ECI designator to
 | |
|             // give a hint.
 | |
| 
 | |
|             $encoding = mb_detect_encoding($text, $hints);
 | |
|         } else {
 | |
|             $encoding = $currentCharacterSetECI->name();
 | |
|         }
 | |
|         try {
 | |
|           //  $result.= mb_convert_encoding($text ,$encoding);//(new String(readBytes, encoding));
 | |
|             $result.= $text;//(new String(readBytes, encoding));
 | |
|         } catch (UnsupportedEncodingException $ignored) {
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
|         $byteSegments = array_merge($byteSegments, $readBytes);
 | |
|     }
 | |
| 
 | |
|     private static function toAlphaNumericChar($value) {
 | |
|         if ($value >= count(self::$ALPHANUMERIC_CHARS)) {
 | |
|             throw FormatException::getFormatInstance();
 | |
|         }
 | |
|         return self::$ALPHANUMERIC_CHARS[$value];
 | |
|     }
 | |
| 
 | |
|     private static function decodeAlphanumericSegment($bits,
 | |
|                                                       &$result,
 | |
|                                                       $count,
 | |
|                                                       $fc1InEffect) {
 | |
|         // Read two characters at a time
 | |
|         $start = strlen($result);
 | |
|         while ($count > 1) {
 | |
|             if ($bits->available() < 11) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $nextTwoCharsBits = $bits->readBits(11);
 | |
|             $result.=(self::toAlphaNumericChar($nextTwoCharsBits / 45));
 | |
|             $result.=(self::toAlphaNumericChar($nextTwoCharsBits % 45));
 | |
|             $count -= 2;
 | |
|         }
 | |
|         if ($count == 1) {
 | |
|             // special case: one character left
 | |
|             if ($bits->available() < 6) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $result.=self::toAlphaNumericChar($bits->readBits(6));
 | |
|         }
 | |
|         // See section 6.4.8.1, 6.4.8.2
 | |
|         if ($fc1InEffect) {
 | |
|             // We need to massage the result a bit if in an FNC1 mode:
 | |
|             for ($i = $start; $i < strlen($result); $i++) {
 | |
|                 if ($result{$i} == '%') {
 | |
|                     if ($i < strlen($result) - 1 && $result{$i + 1} == '%') {
 | |
|                         // %% is rendered as %
 | |
|                         $result  = substr_replace($result,'',$i + 1,1);//deleteCharAt(i + 1);
 | |
|                     } else {
 | |
|                         // In alpha mode, % should be converted to FNC1 separator 0x1D
 | |
|                         $result.setCharAt($i,  chr(0x1D));
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private static function  decodeNumericSegment($bits,
 | |
|                                                   &$result,
 | |
|                                                   $count) {
 | |
|         // Read three digits at a time
 | |
|         while ($count >= 3) {
 | |
|             // Each 10 bits encodes three digits
 | |
|             if ($bits->available() < 10) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $threeDigitsBits = $bits->readBits(10);
 | |
|             if ($threeDigitsBits >= 1000) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $result.=(self::toAlphaNumericChar($threeDigitsBits / 100));
 | |
|             $result.=(self::toAlphaNumericChar(($threeDigitsBits / 10) % 10));
 | |
|             $result.=(self::toAlphaNumericChar($threeDigitsBits % 10));
 | |
|             $count -= 3;
 | |
|         }
 | |
|         if ($count == 2) {
 | |
|             // Two digits left over to read, encoded in 7 bits
 | |
|             if ($bits->available() < 7) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $twoDigitsBits = $bits->readBits(7);
 | |
|             if ($twoDigitsBits >= 100) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $result.=(self::toAlphaNumericChar($twoDigitsBits / 10));
 | |
|             $result.=(self::toAlphaNumericChar($twoDigitsBits % 10));
 | |
|         } else if ($count == 1) {
 | |
|             // One digit left over to read
 | |
|             if ($bits->available() < 4) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $digitBits = $bits->readBits(4);
 | |
|             if ($digitBits >= 10) {
 | |
|                 throw FormatException::getFormatInstance();
 | |
|             }
 | |
|             $result.=(self::toAlphaNumericChar($digitBits));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private static function parseECIValue($bits) {
 | |
|         $firstByte = $bits->readBits(8);
 | |
|         if (($firstByte & 0x80) == 0) {
 | |
|             // just one byte
 | |
|             return $firstByte & 0x7F;
 | |
|         }
 | |
|         if (($firstByte & 0xC0) == 0x80) {
 | |
|             // two bytes
 | |
|             $secondByte = $bits->readBits(8);
 | |
|             return (($firstByte & 0x3F) << 8) | $secondByte;
 | |
|         }
 | |
|         if (($firstByte & 0xE0) == 0xC0) {
 | |
|             // three bytes
 | |
|             $secondThirdBytes = $bits->readBits(16);
 | |
|             return (($firstByte & 0x1F) << 16) | $secondThirdBytes;
 | |
|         }
 | |
|         throw FormatException::getFormatInstance();
 | |
|     }
 | |
| 
 | |
| }
 |